In [None]:
from dotenv import load_dotenv

from documentation.how_tos.example_data import (
    EXAMPLE_1_INPUT,
    DummyAggregationLogic,
    DummyEvaluationLogic,
    DummyExample,
    DummyTask,
)
from intelligence_layer.connectors.studio.studio import StudioClient
from intelligence_layer.evaluation.benchmark.studio_benchmark import (
    StudioBenchmarkRepository,
)
from intelligence_layer.evaluation.dataset.studio_dataset_repository import (
    StudioDatasetRepository,
)

load_dotenv()

examples = [
    DummyExample(input="input0", expected_output="expected_output0", data="data0"),
    DummyExample(
        input=EXAMPLE_1_INPUT, expected_output="expected_output1", data="data1"
    ),
]

# How to execute Benchmarks
<div class="alert alert-info">  

Make sure your account has permissions to use the Studio application.

For an on-prem or local installation, please contact the corresponding team.
</div>

0. Initialize a `StudioClient` with a project.
    - Use an existing project or create a new one with the `StudioClient.create_project` function.
    
1. Create a `StudioDatasetRepository` and create a new `Dataset` via `StudioDatasetRepository.create_dataset`, which will automatically upload this new `Dataset` to Studio.

2. Create a `StudioBenchmarkRepository` and instantiate a benchmark with your `evaluation_logic` and `aggregation_logic` using the `create_benchmark` function.

3. Execute the `Benchmark` with your initialized `Task`

### Example

In [None]:
# Step 0
studio_client = StudioClient(project="my project_name", create_project=True)

# Step 1
studio_dataset_repository = StudioDatasetRepository(studio_client)
dataset = studio_dataset_repository.create_dataset(examples, "my_dataset")

# Step 2
studio_benchmark_repository = StudioBenchmarkRepository(studio_client)
evaluation_logic = DummyEvaluationLogic()
aggregation_logic = DummyAggregationLogic()
benchmark = studio_benchmark_repository.create_benchmark(
    dataset.id, evaluation_logic, aggregation_logic, "my_benchmark"
)

# Step 3
task = DummyTask()
benchmark.execute(task, "my_task")