This notebook loads all the optuna studies in the "tuning" folder and arranges them in a dataframe. It also loads the performance of the best model from the paper and the rerun results.

It can serve as a starting point for further analysis.

In [None]:
# Load all the studies into a dataframe

import optuna
from collections import Counter
from optuna.trial import TrialState
import pandas as pd
import numpy as np
import datetime
from pathlib import Path

import imitation.util.sacred_file_parsing as sfp


experiment_log_files = list(Path().glob("*/*.log"))

experiment_log_files

raw_study_data = []

for log_file in experiment_log_files:
    d = dict()
    
    d['logfile'] = log_file
    
    study = optuna.load_study(storage=optuna.storages.JournalStorage(
            optuna.storages.JournalFileStorage(str(log_file))
        ),
        # in our case, we have one journal file per study so the study name can be
        # inferred
        study_name=None,
    )
    d['study'] = study
    d['study_name'] = study.study_name
    
    trial_state_counter = Counter(t.state for t in study.trials)
    n_completed_trials = trial_state_counter[TrialState.COMPLETE]
    d['trials'] = n_completed_trials
    d['trials_running'] = Counter(t.state for t in study.trials)[TrialState.RUNNING]
    d['trials_failed'] = Counter(t.state for t in study.trials)[TrialState.FAIL]
    d['all_trials'] = len(study.trials)
    
    if n_completed_trials > 0:
        d['best_value'] = round(study.best_trial.value, 2)
        
    assert "_" in study.study_name
    study_segments = study.study_name.split("_") 
    assert len(study_segments) > 3
    tuning, algo, with_ = study_segments[:3]
    assert (tuning, with_) == ("tuning", "with")
    
    d['algo'] = algo
    d['env'] = "_".join(study_segments[3:])
    d['best_trial_duration'] = study.best_trial.duration
    d['mean_duration'] = sum([t.duration for t in study.trials if t.state == TrialState.COMPLETE], datetime.timedelta())/n_completed_trials
    
    reruns_folder = log_file.parent / "reruns"
    rerun_results = [round(run['result']['imit_stats']['monitor_return_mean'], 2)
                     for conf, run in sfp.find_sacred_runs(reruns_folder, only_completed_runs=True)]
    d['rerun_values'] = rerun_results
    
    raw_study_data.append(d)
    
study_data = pd.DataFrame(raw_study_data)

In [None]:
# Add performance of the best model from the paper
import pandas as pd

environments = [
    "seals_ant",
    "seals_half_cheetah",
    "seals_hopper",
    "seals_swimmer",
    "seals_walker",
    "seals_humanoid",
    "seals_cartpole",
    "pendulum",
    "seals_mountain_car"
]

pc_paper_700 = dict(
    seals_ant=200,
    seals_half_cheetah=4700,
    seals_hopper=4500,
    seals_swimmer=170,
    seals_walker=4900,
    seals_humanoid="-",
    seals_cartpole="-",
    pendulum=1300,
    seals_mountain_car="-",
)

pc_paper_1400 = dict(
    seals_ant=100,
    seals_half_cheetah=5600,
    seals_hopper=4500,
    seals_swimmer=175,
    seals_walker=5900,
    seals_humanoid="-",
    seals_cartpole="-",
    pendulum=750,
    seals_mountain_car="-",
)

rl_paper = dict(
    seals_ant=16,
    seals_half_cheetah=420,
    seals_hopper=4210,
    seals_swimmer=175,
    seals_walker=5370,
    seals_humanoid="-",
    seals_cartpole="-",
    pendulum=1300,
    seals_mountain_car="-",
)

rl_ours = dict(
    seals_ant=3034,
    seals_half_cheetah=1675.76,
    seals_hopper=203.45,
    seals_swimmer=292.84,
    seals_walker=2465.56,
    seals_humanoid=3224.12,
    seals_cartpole=500.00,
    pendulum=-189.25,
    seals_mountain_car=-97.00,
)

for algo, values_by_env in dict(
    pc_paper_700=pc_paper_700,
    pc_paper_1400=pc_paper_1400,
    rl_paper=rl_paper,
    rl_ours=rl_ours,
).items():
    for env, value in values_by_env.items():
        if value == "-":
            continue
        raw_study_data.append(dict(
            algo=algo,
            env=env,
            best_value=value,
        ))
        
study_data = pd.DataFrame(raw_study_data)

In [None]:
from IPython.display import display

print("Benchmark Data")
display(study_data[["algo", "env", "best_value"]])

print("Rerun Data")
display(study_data[["algo", "env", "best_value", "rerun_values"]][study_data["rerun_values"].map(np.std) > 0])