In [None]:
from uuid import uuid4

from dotenv import load_dotenv

from documentation.how_tos.example_data import (
    ComplexDummyEvaluationLogic,
    ComplexDummyTask,
    DummyAggregationLogic,
    example_data,
)
from intelligence_layer.connectors import StudioClient
from intelligence_layer.evaluation import (
    StudioBenchmarkRepository,
    StudioDatasetRepository,
)

load_dotenv()
my_example_data = example_data()
examples = my_example_data.complex_examples

# How to execute Benchmarks
<div class="alert alert-info">  

Make sure your account has permissions to use the Studio application.

For an on-prem or local installation, please contact the corresponding team.
</div>

0. Initialize a `StudioClient` with a project.
    - Use an existing project or create a new one with the `StudioClient.create_project` function.
    
1. Create a `StudioDatasetRepository` and create a new `Dataset` via `StudioDatasetRepository.create_dataset`, which will automatically upload this new `Dataset` to Studio.

2. Create a `StudioBenchmarkRepository` and instantiate a benchmark with your `evaluation_logic` and `aggregation_logic` using the `create_benchmark` function.

3. Execute the `Benchmark` with your initialized `Task`

### Example

In [None]:
# Step 0

studio_client = StudioClient(
    project=my_example_data.studio_project_name, create_project=True
)

# Step 1
studio_dataset_repository = StudioDatasetRepository(studio_client)
dataset = studio_dataset_repository.create_dataset(examples, "my_dataset")

# Step 2
studio_benchmark_repository = StudioBenchmarkRepository(studio_client)
evaluation_logic = ComplexDummyEvaluationLogic()
aggregation_logic = DummyAggregationLogic()
benchmark = studio_benchmark_repository.create_benchmark(
    dataset.id, evaluation_logic, aggregation_logic, f"my_benchmark-{uuid4()}"
)

# Step 3

task = ComplexDummyTask()
benchmark.execute(task, "my_task")