In [None]:
from documentation.how_tos.example_data import (
    DummyAggregationLogic,
    DummyEvaluation,
    DummyExample,
    example_data,
)
from intelligence_layer.evaluation import (
    Aggregator,
    Example,
    IncrementalEvaluationLogic,
    IncrementalEvaluator,
    InMemoryAggregationRepository,
    InMemoryEvaluationRepository,
    SuccessfulExampleOutput,
)

# How to implement incremental evaluation
This notebook outlines how to perform evaluations in an incremental fashion, i.e., adding additional runs to your existing evaluations without the need for recalculation.
    
## Step-by-Step Guide
0. Run your tasks on the datasets on which you want to evaluate them (see [here](./how_to_run_a_task_on_a_dataset.ipynb))
   - When evaluating multiple runs, all of them need the same data types 
1. Initialize all necessary repositories and define your `IncrementalEvaluationLogic`; It is similar to a normal `EvaluationLogic` (see [here](./how_to_implement_a_simple_evaluation_and_aggregation_logic.ipynb)) but you additionally have to implement your own `do_incremental_evaluate` method
2. Initialize an `IncrementalEvaluator` with the repositories and your custom `IncrementalEvaluationLogic`
3. Call the `evaluate_runs` method of the `IncrementalEvaluator`
4. Aggregate your evaluations using the [standard aggregation](./how_to_aggregate_evaluations.ipynb) or using a [custom aggregation logic](./how_to_implement_a_simple_evaluation_and_aggregation_logic.ipynb)

#### Steps for addition of new runs 
5. Call the `evaluate_additional_runs` method of the `IncrementalEvaluator`:
   - `run_ids`: Runs to be included in the evaluation results, including those that have been evaluated before
   - `previous_evaluation_ids`: Runs **not** to be re-evaluated, depending on the specific implementation of the `do_incremental_evaluate` method
6. Aggregate all your `EvaluationOverview`s

In [None]:
# Step 0
examples = [
    DummyExample(input="input1", expected_output="expected_output1", data="data1")
]
my_example_data = example_data()

dataset_repository = my_example_data.dataset_repository
run_repository = my_example_data.run_repository

# Step 1
evaluation_repository = InMemoryEvaluationRepository()
aggregation_repository = InMemoryAggregationRepository()


class DummyIncrementalEvaluationLogic(
    IncrementalEvaluationLogic[str, str, str, DummyEvaluation]
):
    def do_incremental_evaluate(
        self,
        example: Example[str, str],
        outputs: list[SuccessfulExampleOutput[str]],
        already_evaluated_outputs: list[list[SuccessfulExampleOutput[str]]],
    ) -> DummyEvaluation:
        # Here we just return the dummy evaluation. In a real use case one could also use `already_evaluated_outputs' to skip previous evaluations,
        return DummyEvaluation(eval="DummyEvalResult")


# Step 2
incremental_evaluator = IncrementalEvaluator(
    dataset_repository,
    run_repository,
    evaluation_repository,
    "My incremental evaluation",
    DummyIncrementalEvaluationLogic(),
)

# Step 3
incremental_evaluator.evaluate_runs(my_example_data.run_overview_1.id)

# Step 4
aggregation_logic = DummyAggregationLogic()
aggregator = Aggregator(
    evaluation_repository, aggregation_repository, "MyAggregator", aggregation_logic
)
aggregation_overview = aggregator.aggregate_evaluation(
    *evaluation_repository.evaluation_overview_ids()
)
print(aggregation_overview)

In [None]:
## Addition of new task/run
# Step 5
run_ids = [my_example_data.run_overview_1.id, my_example_data.run_overview_2.id]
incremental_evaluator.evaluate_additional_runs(
    *run_ids,
    previous_evaluation_ids=evaluation_repository.evaluation_overview_ids(),
)

# Step 6
second_aggregation_overview = aggregator.aggregate_evaluation(
    *evaluation_repository.evaluation_overview_ids()
)
print(second_aggregation_overview)