In [1]:
import os
import sys
import pandas as pd
import seaborn as sns

# Results grid analysis notebook

Running the trials and experiments via Ray produces a number of files that are related to the "experiment" (dataset and all the strategies + all the repeated runs, each being a "trial"). Each trial is stored individually, but will be read and collected by a `result_grid` object that can be used to analyse results at the experiment and trial level.

This notebook is intended to be used for all the experiments, adapt as necessary to produce the relevant plots.

In [2]:
# Load result_grid
# Adapt as needed
from ray import tune
experiment_name = "diabetes"
storage_path = os.path.join(os.getcwd(), f"benchmark_results")
experiment_path = os.path.join(storage_path, experiment_name)

print(f"Loading results from {experiment_path}...")

restored_tuner = tune.Tuner.restore(experiment_path, trainable="trial")
result_grid = restored_tuner.get_results()


  from .autonotebook import tqdm as notebook_tqdm
2024-09-09 12:49:49,218	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-09-09 12:49:49,571	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


: 

In [2]:
# First we will do some housekeeping to make sure our results grid is as we expect

# Check if there have been errors
if result_grid.errors:
    print("One of the trials failed!")
else:
    print("No errors!")

num_results = len(result_grid)
print("Number of results:", num_results)

# Iterate over results
for i, result in enumerate(result_grid):
    if result.error:
        print(f"Trial #{i} had an error:", result.error)
        continue

    print(
        f"Trial #{i} finished successfully with a test metric of:",
        result.metrics["score"]
    )

In [13]:
results_df = result_grid.get_dataframe()

In [14]:
results_df

In [15]:
from tqdm import tqdm
def process_results_grid_into_sns_plot_df(results_df):
    """Process the results grid into a data frame so that we can plot line plots with seaborn.

    The current results_df format has each row as a trial, with columns for the trial's parameters and metrics.
    We want to create a data frame that has a row for each trial and each element in the list of iteration_metrics,
    such that we can plot the metrics over the iterations. The resulting data frame will have the following columns:

    - trial_id: The trial's ID
    - iteration: The iteration number
    - test_metric: The test metric at that iteration
    - strategy: The strategy used in that iteration
    - seed: The seed used in that trial
    - score: The score for that trial
    """

    # iteration_metrics = ["test_metric"] # if it was a dictionary entry in the row
    sns_plot_df = []

    for a, row in tqdm(results_df.iterrows()):
        trial_id = row["trial_id"]
        seed = row["config/seed"]
        score = row["score"]

        for iteration, metric_value in enumerate(row["iteration_metrics"]):
            # for metric_val in iteration_metrics:
            sns_plot_df.append(
                {
                    "trial_id": trial_id,
                    "iteration": iteration,
                    "test_metric": metric_value,
                    "strategy": row["config/strategy"],
                    "seed": seed,
                    "score": score,
                }
            )

    return pd.DataFrame(sns_plot_df)

In [16]:
sns_results_df = process_results_grid_into_sns_plot_df(results_df)
sns_results_df

In [17]:
sns.lineplot(data=sns_results_df, x="iteration", y="test_metric", hue="strategy")