In [30]:
import os
import json
import pandas as pd
from pathlib import Path
import numpy as np

In [31]:
root_dir = Path('/h/sergio/tree-search-planning/muzero-general/out')

In [32]:
evaluations = []
for env in os.listdir(root_dir):
    for run in os.listdir(root_dir / env):
        try:
            stats_json = next((root_dir / env / run).glob('*.stats.json'))
            evaluations.append([env, run, stats_json])
        except:
            pass

In [33]:
evaluations_df = pd.DataFrame(evaluations, columns=['env', 'run', 'stats_json'])
evaluations_df.head()

Unnamed: 0,env,run,stats_json,Unnamed: 4
0,roundabout-v0,MuZero,run_20210414-203008_9350,/h/sergio/tree-search-planning/muzero-general/...
1,highway-v0,MuZero,run_20210414-205342_16381,/h/sergio/tree-search-planning/muzero-general/...


In [34]:
def get_stats(stats_json):
    with open(stats_json) as f:
        stats = json.load(f)
    return stats

In [35]:
evaluations_df['stats_dict'] = evaluations_df['stats_json'].apply(get_stats)
evaluations_df.head()

Unnamed: 0,env,run,stats_json,stats_dict,Unnamed: 5
0,roundabout-v0,MuZero,run_20210414-203008_9350,/h/sergio/tree-search-planning/muzero-general/...,{'initial_reset_timestamp': 1618446608.8101778...
1,highway-v0,MuZero,run_20210414-205342_16381,/h/sergio/tree-search-planning/muzero-general/...,"{'initial_reset_timestamp': 1618448022.171705,..."


In [36]:
def mean_total_reward(stat_dict):
    return np.mean(stat_dict['episode_rewards'])
def std_total_reward(stat_dict):
    return np.std(stat_dict['episode_rewards'])
def p_crashed(stat_dict):
    ep_crashed = [int(any(ep)) for ep in stat_dict['episode_crashed']]
    return np.mean(ep_crashed)
def mean_speed(stat_dict):
    ep_speed = [np.mean(ep) for ep in stat_dict['episode_speed']]
    return np.mean(ep_speed)
def std_speed(stat_dict):
    ep_speed = [np.mean(ep) for ep in stat_dict['episode_speed']]
    return np.std(ep_speed)

In [37]:
evaluations_df['mean_episode_length'] = evaluations_df['stats_dict'].apply(mean_episode_length)
evaluations_df['mean_episode_reward'] = evaluations_df['stats_dict'].apply(mean_total_reward)
evaluations_df['std_episode_reward'] = evaluations_df['stats_dict'].apply(std_total_reward)
evaluations_df['p_crashed'] = evaluations_df['stats_dict'].apply(p_crashed)
evaluations_df['mean_speed'] = evaluations_df['stats_dict'].apply(mean_speed)
evaluations_df['std_speed'] = evaluations_df['stats_dict'].apply(std_speed)

In [38]:
evaluations_df.set_index(['env', 'model'])[['mean_episode_reward', 'std_episode_reward', 'p_crashed', 'mean_speed', 'std_speed']]

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_episode_reward,std_episode_reward,p_crashed,mean_speed,std_speed
env,model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
roundabout-v0,MuZero,10.9752,0.903182,0.14,9.119461,0.8353
highway-v0,MuZero,22.980087,9.104898,0.52,21.337703,1.396431


In [11]:
evaluations_df['mean_episode_length']

0    10.82
Name: mean_episode_length, dtype: float64

In [12]:
evaluations_df['mean_episode_reward']

0    10.9752
Name: mean_episode_reward, dtype: float64

In [22]:
evaluations_df['p_crashed']

0    0.14
Name: p_crashed, dtype: float64

In [24]:
evaluations_df['mean_speed']

0    9.119461
Name: mean_speed, dtype: float64