In [None]:
import os

import pandas as pd
import matplotlib.pyplot as plt

# Data loading

In [None]:
def load_mon():
    res = {}
    results_dir = 'final_results'
    for env in os.listdir(results_dir):
        for exp in os.listdir(os.path.join(results_dir, env)):
            csv_path = os.path.join(results_dir, env, exp, 'mon', 'progress.csv')
            df = pd.read_csv(csv_path)
            res.setdefault(env, {})[exp] = df
    return res
            
def load_stats():
    res = {}
    results_dir = 'stats'
    for env in os.listdir(results_dir):
        for csv_fname in os.listdir(os.path.join(results_dir, env)):
            csv_path = os.path.join(results_dir, env, csv_fname)
            df = pd.read_csv(csv_path, index_col=False, names=['id', 'checkpoint', 'runs', 'ties', 'our_loss', 'our_win'])
            
            ids = df['id'].str.split(' ')
            assert (ids.str[0] == '[MAGIC').all()
            assert (ids.str[1] == 'NUMBER').all()
            assert (ids.str[3] == 'Name/Samples/Tie/Victim_Win/Victim_Loss').all()
            df.drop(columns=['id'], inplace=True)
            
            df['checkpoint'] = df['checkpoint'].str.split('/').str[-1].astype('int') - 1
            df = df.set_index('checkpoint')
            df = df.sort_index()
            
            assert (df['runs'] == df['ties'] + df['our_loss'] + df['our_win']).all()
            
            exp = csv_fname.replace('_checkpoint_stats.log', '')
            res.setdefault(env, {})[exp] = df
    return res

def combine(mon, stats):
    res = {}
    #assert mon.keys() == stats.keys()
    for k in stats.keys():
        mon_sub = mon[k]
        stats_sub = stats[k]
        #assert mon_sub.keys() == stats_sub.keys()
        for k2 in stats_sub.keys():
            m = mon_sub[k2]
            s = stats_sub[k2]
            assert (m.index == m['nupdates'] - 1).all()
            df = pd.concat([m, s], axis=1)
            res.setdefault(k, {})[k2] = df
    return res

def by_experiment(dfs):
    res = {}
    for env, dfs_by_env in dfs.items():
        for exp_name, df in dfs_by_env.items():
            x = exp_name.split('_')
            date = x[0]
            time = x[1]
            env2 = x[-2]
            assert env.startswith(env2)
            seed = int(x[-1])
            name = '_'.join(x[2:-2])
            res.setdefault(env, {}).setdefault(name, {})[seed] = df
    return res

In [None]:
mon = load_mon()
stats = load_stats()
dfs = combine(mon, stats)
dfs = by_experiment(dfs)

# Graphs

In [None]:
def plot_win_rate(dfs, ylim=None):
    win_rate = pd.DataFrame({k: df['our_win'] / df['runs'] * 100 for k, df in dfs.items()})
    win_rate.rolling(10).mean().plot()
    plt.xlabel('Epoch')
    plt.ylabel('Win Rate (%)')
    if ylim is not None:
        plt.ylim(ylim)

def plot_reward(dfs):
    eprewmean = pd.DataFrame({k: df['eprewmean'] for k, df in dfs.items()})
    eprewmean.rolling(10).mean().plot()
    plt.xlabel('Epoch')
    plt.ylabel('Episode Reward')

In [None]:
graph_config = {
    'kick': {
        #TODO: why does seed 2 always do best? this is surprising? is it actually choosing e.g. opponent?
        'exps': {
            'Sparse': ('rl_baseline_win_loss', 2),
            'Dense': ('rl_baseline_their_shaped', 2),        
            'Heuristic Only': ('rl_new_only_shaped', 2),
            'Heuristic + Sparse': ('rl_new_win_loss_plus_shaped', 2),
            'Heuristic + Dense': ('rl_new_their_shaped_plus_our_shaped', 2),
        },
        'win_opt': {'ylim': (0,100)},
    },
    'sumo': {
        'exps': {
            'Sparse': ('rl_baseline_win_loss', 2),
            'Dense': ('rl_baseline_their_shaped', 2),        
            'Heuristic Only': ('rl_only_shaped', 2),
            'Heuristic + Sparse': ('rl_win_loss_plus_shaped', 2),
            'Heuristic + Dense': ('rl_their_shaped_plus_our_shaped', 2),
        },
    },
}

In [None]:
with plt.style.context(('./graphs/paper.mplstyle', './graphs/onecol.mplstyle')):
    for env, cfg in graph_config.items():
        subset = {k: dfs[env][exp][seed] for k, (exp, seed) in cfg['exps'].items() if exp in dfs[env]}

        plot_reward(subset, **cfg.get('reward_opt', {}))
        plt.savefig(os.path.join('figs', '{}_reward.pdf'.format(env)))

        plot_win_rate(subset, **cfg.get('win_opt', {}))
        plt.savefig(os.path.join('figs', '{}_win.pdf'.format(env)))