In [1]:
from pydantic import Field
from deepeval.dataset import Golden
from deepeval.test_case import LLMTestCase

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from benchmarq.adapter import Evaluator
from benchmarq.experiment import Experiment


model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")
generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

# noinspection PyPep8Naming
def generate(prompt: str) -> str:
    return generator(prompt)


class Test(Evaluator):
    def __init__(self):
        super().__init__()

    def evaluate_consumption(self, input: Golden):
        print(f"Input: {input.input}")
        print(f"Output: {generate(input.input)}")

    def evaluate_test_case(self, input: Golden) -> LLMTestCase:
        pass

experiment = Experiment(
    subquestionId="subquestionId",
    name="name",
    description="A very long description",
    settings=Test())

print(experiment.run())


Device set to use mps:0
[codecarbon INFO @ 14:40:01] [setup] RAM Tracking...
[codecarbon INFO @ 14:40:01] [setup] GPU Tracking...
[codecarbon INFO @ 14:40:01] No GPU found.
[codecarbon INFO @ 14:40:01] [setup] CPU Tracking...
[codecarbon INFO @ 14:40:02] CPU Model on constant consumption mode: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
[codecarbon INFO @ 14:40:02] >>> Tracker's metadata:
[codecarbon INFO @ 14:40:02]   Platform system: macOS-15.3.1-x86_64-i386-64bit
[codecarbon INFO @ 14:40:02]   Python version: 3.11.0
[codecarbon INFO @ 14:40:02]   CodeCarbon version: 2.2.2
[codecarbon INFO @ 14:40:02]   Available RAM : 16.000 GB
[codecarbon INFO @ 14:40:02]   CPU count: 16
[codecarbon INFO @ 14:40:02]   CPU model: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
[codecarbon INFO @ 14:40:02]   GPU count: None
[codecarbon INFO @ 14:40:02]   GPU model: None
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: something


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: [{'generated_text': 'something by itself, but the only logical explanation offered is so far from true that even with it, it cannot be considered evidence.\n\n"The second rule of quantum mechanics is very similar to that used by Schrödinger\'s second law,'}]
Input: something


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: [{'generated_text': 'something "charming." They are too often not able to think straight. Too often, I fear, they too fail to realize that they are being held up as the ultimate protector of women who are oppressed by men.\n\nThis isn\'t just'}]
Input: something


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: [{'generated_text': "something and have that personality about you and your character, what kind of person you are. Do you play detective with Mr. Tiller?\n\nYeah. I have a lot of detective work that I'm doing because I have to figure out a"}]
Input: something


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: [{'generated_text': 'something-by-name definition)\n\nThe same pattern emerges when you add a list of a bunch of names, then set the order of each list to make it more predictable.\n\nThe syntax for dealing with an enumeration is slightly different'}]
Input: something


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: [{'generated_text': 'something-that-does-make-you-feel-good" or "I feel positive for my people." We\'re all familiar with "I love them," "I love them for how they are" or "I miss them for having a'}]
Input: something


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: [{'generated_text': 'something from our past experiences in the \'19-\'20s. "That\'s really where it started to get interesting."\n\nWhen I met Dave and Jack, they were standing outside our house when we heard the "pop" come out of Jack'}]
Input: something


[codecarbon INFO @ 14:40:28] Energy consumed for RAM : 0.000025 kWh. RAM Power : 6.0 W
[codecarbon INFO @ 14:40:28] Energy consumed for all CPUs : 0.000094 kWh. Total CPU Power : 22.5 W
[codecarbon INFO @ 14:40:28] 0.000119 kWh of electricity used since the beginning.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: [{'generated_text': 'something a problem."\n\n"Oh… no matter. We have to put them down. I want them to be out there. That\'s why they\'re in the house… so I can see him before and when he dies. No matter what'}]
Input: something


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: [{'generated_text': 'something one), and it is always at this time of the year when we get in the summer, and it\'s so much cooler on that cold, rainy, and windy month that you just feel like you\'re in a new place." She told'}]
Input: something


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: [{'generated_text': 'something they\'d learned in university. He\'s been here several times, but once he graduated he left and never took this job. He was also homeless as of late. There\'s a real sense of disconnect."\n\nWhile trying to find work in'}]
Input: something


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: [{'generated_text': 'something that was really difficult to define."\n\nThat\'s why it was so important to see someone like Jauzmer as a leader of a small band of musicians, which, combined with a knack for playing along to a musical formula, was'}]
Input: something


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output: [{'generated_text': "something. If that wasn't enough, it also included the fact that that's exactly what some of you asked for. If you were feeling nostalgic in particular, that would mean your money was actually coming through, so you might be asking yourself what to"}]
Input: something


[codecarbon INFO @ 14:40:41] Energy consumed for RAM : 0.000048 kWh. RAM Power : 6.0 W
[codecarbon INFO @ 14:40:41] Energy consumed for all CPUs : 0.000181 kWh. Total CPU Power : 22.5 W
[codecarbon INFO @ 14:40:41] 0.000229 kWh of electricity used since the beginning.


Output: [{'generated_text': 'something has to be in there for the long haul."'}]
RunResult(consumption_results=ConsumptionResult(timestamp='2025-02-27T14:40:41', project_name='codecarbon', run_id='ca87554b-4058-43c7-9382-fd2437021dbd', experiment_id='1', duration=28.911799907684326, emissions=7.578995478100329e-05, emissions_rate=2.6214194558277722e-06, cpu_power=22.5, gpu_power=0.0, ram_power=6.0, cpu_energy=0.00018068661242723463, gpu_energy=0.0, ram_energy=4.818027019500732e-05, energy_consumed=0.00022886688262224194, country_name='The Netherlands', country_iso_code='NLD', region='south holland', cloud_provider='', cloud_region='', os='macOS-15.3.1-x86_64-i386-64bit', python_version='3.11.0', codecarbon_version='2.2.2', cpu_count=16.0, cpu_model='Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz', gpu_count=None, gpu_model=None, longitude=4.2706, latitude=52.0492, ram_total_size=16.0, tracking_mode='machine', on_cloud='N', pue=1), timestamp=datetime.datetime(2025, 2, 27, 14, 40, 42, 5964))
