In [None]:
%matplotlib inline
import collections
import copy
import html
import os
import re
import shutil
import json
import glob
from functools import partial
from pathlib import Path
from numpy import trapz
import csv
import pandas as pd

from datetime import datetime

from matplotlib import pyplot as plt 
import seaborn as sns 
from math import sqrt 
import pandas as pd 
import math

from IPython.core.display import display, HTML
import numpy as np 
import pandas as pd

runs_directory = "/userhome/30/xchen3/il-representations/runs"

path_translations = {
    # when loading things like 'encoder_path' and 'policy_path' from configs,
    # replace the thing on the left with the thing on the right
#     "/data/il-representations/": gfs_mount,
#     "/root/il-rep/runs/": os.path.join(gfs_mount, cluster_subpath),
#     "/home/sam/repos/il-representations/cloud/runs/": os.path.join(gfs_mount, cluster_subpath)
}

## Search and Organize experiments

In [None]:
def get_parent_relpath(sample_parent_file, local_root_dir):
    """Get root-relative path to a 'parent' directory, such as the directory
    containing a saved encoder or policy. This is somewhat tricky because
    we need to replace paths that might be different on svm/perceptron or on
    a laptop compared to what they were on GCP. e.g. inside the Ray docker
    container, '/root/il-rep/runs' maps to 'cluster-data/' in the GFS volume.
    The `path_translations` variable handles all the necessary changes."""
    for prefix, replacement in path_translations.items():
        if sample_parent_file.startswith(prefix):
            sample_parent_file = replacement + sample_parent_file[len(prefix):]

    full_path = os.path.abspath(sample_parent_file)
    full_dir = os.path.dirname(full_path)
    rel_dir = os.path.relpath(full_dir, local_root_dir)

    return rel_dir

class SubexperimentRun:
    """A SubexperimentRun associates all the information associated
    with a run of a particular Sacred sub-experiment. That means a
    run (single execution) of the 'repl', 'il_train', or 'il_test'
    experiments."""
    def __init__(self, subexp_dir, experiment_dir_root):
        # Subexperiment dir is used as a unique identifier.
        # We strip out the leading 'experiment_dir_root' to shorten identifiers.
        subexp_dir = os.path.abspath(subexp_dir)
        experiment_dir_root = os.path.abspath(experiment_dir_root)
        self.ident = os.path.relpath(subexp_dir, experiment_dir_root)
        self.subexp_dir = subexp_dir
        self.experiment_dir_root = experiment_dir_root

        # usually paths are like, e.g., 'chain_runs/repl/42' or
        # 'chain_runs/il_train/13'; if we take the second last component,
        # we should get the mode
        self.mode = os.path.split(os.path.split(subexp_dir)[0])[1]
        assert self.mode in {'repl', 'il_train', 'il_test'}, (mode, subexp_dir)

        # Load experiment config
        config_path = os.path.join(subexp_dir, 'config.json')
        with open(config_path, 'r') as fp:
            self.config = json.load(fp)

        # Store a path to relevant progress.csv file (only for il_train/repl)
        progress_path = os.path.join(subexp_dir, "progress.csv")
        if os.path.exists(progress_path):
            self.progress_path = progress_path
        else:
            self.progress_path = None

        # Store a path to relevant eval.json file
        eval_json_path = os.path.join(subexp_dir, "eval.json")
        if os.path.exists(eval_json_path):
            self.eval_json_path = eval_json_path
        else:
            self.eval_json_path = None

        # Infer the .ident attribute for the parent experiment
        # (if it exists)
        if self.mode == 'il_train' and self.config.get('encoder_path') is not None:
            encoder_relpath = get_parent_relpath(
                self.config['encoder_path'], experiment_dir_root)
            # The relpath is going to be something like
            # "chain_runs/10/repl/5/checkpoints/representation_encoder".
            # We heuristically remove the last two parts.
            # (this definitely breaks on Windows…)
            encoder_relpath = '/'.join(encoder_relpath.split('/')[:-2])
            self.parent_ident = encoder_relpath
        elif self.mode == 'il_test':
            policy_relpath = get_parent_relpath(
                self.config['policy_dir'], experiment_dir_root)
            self.parent_ident = policy_relpath
        else:
            # "repl" runs and "il_train" runs without an encoder_path
            # have no parents
            assert self.mode == 'repl' \
              or (self.mode == 'il_train' and self.config.get('encoder_path') is None), \
               (self.mode, self.config.get('encoder_path'))
            self.parent_ident = None
            
        # HACK: adding a use_repl key so that we can see whether il_train runs used repL
        if self.mode == 'il_train':
            self.config['use_repl'] = self.parent_ident is not None

    def get_merged_config(self, index):
        """Get a 'merged' config dictionary for this subexperiment and
        all of its parents. The dict will have a format like this:
        
        {"benchmark": {…}, "il_train": {…}, "il_test": {…}, "repl": {…}}
        
        Note that some keys might not be present (e.g. if this is a `repl` run,
        it will not have the `il_train` key; if this is an `il_train` run with
        no parent, then the `repl` key will be absent)."""
        config = {self.mode: dict(self.config)}
        extract_keys = ('env_cfg', 'venv_opts', 'env_data')
        for extract_key in extract_keys:
            if extract_key in config[self.mode]:
                # move 'benchmark' key to the top because that ingredient name is
                # shared between il_train, and il_test experiments
                config[extract_key] = config[self.mode][extract_key]
                del config[self.mode][extract_key]
        parent = self.get_parent(index)
        if parent is not None:
            # TODO: merge this properly, erroring on incompatible duplicate
            # keys. I think Cody has code for this.
            config.update(parent.get_merged_config(index))
        return config
    
    def get_parent(self, index):
        if self.parent_ident is None:
            return None
        return index.get_subexp(self.parent_ident)

    def __hash__(self):
        return hash(self.ident)
    
    def __eq__(self, other):
        if not isinstance(other, SubexperimentRun):
            return NotImplemented
        return self.ident == other.ident

class SubexperimentIndex:
    """An index of subexperiments. For now this just supports
    looking up experiments by identifier. Later it might support
    lookup by attributes."""
    def __init__(self):
        self.subexp_by_ident = {}
        
    def add_subexp(self, subexp):
        if subexp.ident in self.subexp_by_ident:
            raise ValueError("duplicate subexperiment:", subexp)
        self.subexp_by_ident[subexp.ident] = subexp
        
    def get_subexp(self, ident):
        return self.subexp_by_ident[ident]
    
    def search(self, **attrs):
        """Find a subexperiment with attributes matching the values
        given in 'attrs'."""
        results = []
        for subexp in self.subexp_by_ident.values():
            for k, v in attrs.items():
                if getattr(subexp, k) != v:
                    break
            else:
                results.append(subexp)
        return results

def get_experiment_directories(root_dir, skip_skopt=True):
    """Look for directories that end in a sequence of numbers, and contain a
    grid_search subdirectory."""
    expt_pat = re.compile(r'^.*/(il_test|il_train|repl)/\d+$')
    ignore_pat = re.compile(r'^.*/(grid_search|_sources)$')  # ignore the grid_search subdir
    expt_dirs = set()
    for root, dirs, files in os.walk(root_dir, followlinks=True, topdown=True):
        if ignore_pat.match(root):
            del dirs[:]
            continue
            
        # check whether tihs is a skopt dir
        if skip_skopt and 'grid_search' in dirs:
            gs_files = os.listdir(os.path.join(root, 'grid_search'))
            if any(s.startswith('search-alg-') for s in gs_files):
                # this is a skopt dir, skip it
                print("skipping skopt directory in", root)
                del dirs[:]
                continue

        found_match = False
        for d in dirs:
            d_path = os.path.abspath(os.path.join(root, d))
            m = expt_pat.match(d_path)
            if m is None:
                continue  # no match
            expt_dirs.add(d_path)
            found_match = True

        if found_match:
            del dirs[:]  # don't recurse
    return sorted(expt_dirs)

# Find all experiment directories (i.e. directories containing a grid_search
# subdir)
def load_all_subexperiments(root_dir, skip_skopt=True):
    """Find all experiment run subdirectories, and create SubexperimentIndex objects for them."""
    print("Searching for experiment directories (might take a minute or two)")
    all_expt_directories = get_experiment_directories(root_dir, skip_skopt=skip_skopt)
    print("Loading experiments (might take another minute or two)")
    index = SubexperimentIndex()
    for expt_dir in all_expt_directories:
        subexp = SubexperimentRun(expt_dir, root_dir)
        index.add_subexp(subexp)
    return index

In [None]:
subexp_index = load_all_subexperiments(runs_directory, skip_skopt=True)
print('Discovered', len(subexp_index.subexp_by_ident), 'subexperiments')

test_expts = subexp_index.search(mode='il_test')

## Plot return curves

In [None]:
def get_result_dict(eval_files, eval_mode=None):
    """
    Given a set of eval_files, read the files one by one, and return a sorted result_dict 
    with keys 'n_update' (list) and 'return_mean' (list). The param eval_mode is for seleting 
    train/test level for Procgen.
    """
    result_dict = {'n_update': [], 'return_mean': []}

    for eval_file in eval_files:
        with open(eval_file) as f:
            test_result = json.load(f)
            
        policy_name = test_result['policy_path'].split('/')[-1]
        nupdate = int(policy_name.split('_')[-2])
        
        if eval_mode:
            assert eval_mode in test_result.keys()
            return_mean = test_result[eval_mode]['return_mean']
        else:
            return_mean = test_result['return_mean']
            
        result_dict['return_mean'].append(return_mean)
        result_dict['n_update'].append(nupdate)
        
        # The results might not be sorted according to nupdates, so we make sure
        # they are sorted correctly here.
        sorted_idx = sorted(range(len(result_dict['n_update'])), key=lambda k: result_dict['n_update'][k])
        for key, value in result_dict.items():
            result_dict[key] = [result_dict[key][idx] for idx in sorted_idx]
    return result_dict


def get_eval_files(test_dir):
    return [os.path.join(test_dir, f) for f in os.listdir(test_dir) if 'eval' in f]

        
def get_result_and_add_to_dict(eval_files, exp_ident, target_dict, algo, task, eval_mode=None):
    result_dict = get_result_dict(eval_files, eval_mode=eval_mode)
    if exp_ident not in target_dict.keys():
        target_dict[exp_ident] = dict(algo=algo, task=task, result_dict=[result_dict])
    else:
        target_dict[exp_ident]['result_dict'].append(result_dict)
    return target_dict
    
        
# Organize the results and index using exp_ident
exp_dict = dict()
for test_expt in test_expts:
    eval_files = get_eval_files(test_expt.subexp_dir)
    if len(eval_files) < 20:
        continue
    parent_config_path = os.path.join(Path(test_expt.config['policy_dir']).parent.absolute(),
                                      'config.json')
    if not os.path.exists(parent_config_path):
        continue
        
    with open(parent_config_path, 'r') as f:
        parent_config = json.load(f)
    task_name = parent_config['env_cfg']['task_name']
    

    algo = parent_config['algo']
    if 'encoder_path' in parent_config.keys() and parent_config['encoder_path'] is not None:
        repl_config_path = os.path.join(Path(parent_config['encoder_path']).parent.parent.parent.absolute(),
                                      'config.json')
        with open(repl_config_path, 'r') as f:
            repl_config = json.load(f)
        algo = repl_config['algo']
    exp_ident = f"{algo}-{task_name}"

    benchmark_name = parent_config['env_cfg']['benchmark_name']
    
    if benchmark_name == 'procgen':
        exp_dict = get_result_and_add_to_dict(eval_files, exp_ident+'-train_level', exp_dict, 
                                              algo, task_name+'-train_level', eval_mode='train_level')
        exp_dict = get_result_and_add_to_dict(eval_files, exp_ident+'-test_level', exp_dict, 
                                              algo, task_name+'-test_level', eval_mode='test_level')
    else:
        exp_dict = get_result_and_add_to_dict(eval_files, exp_ident, exp_dict, algo, task_name)

In [79]:
def lineplot_from_df(df, x, y, title=None, y_label=None, save_path=None):
    plt.figure()
    ax = sns.lineplot(x=x, y=y, data=df)
    sns.set(style='darkgrid')
    
    if title:
        ax.set_title(title)
    
    if y_label:
        ax.set_ylabel(y_label)
    
    fig = ax.get_figure()
    
    if save_path:
        fig.savefig(save_path)


score_df = pd.DataFrame()
count = 0
for exp_ident, exp_info in exp_dict.items():
    # exp_result is a list of dict with keys 'n_update' and 'return_mean'. len(exp_result) is the number of 
    # random seeds.
    algo, task, exp_result = exp_info['algo'], exp_info['task'], exp_info['result_dict']
    merged_df = pd.DataFrame()
    merged_df = merged_df.append([pd.DataFrame(result) for result in exp_result], ignore_index=True)
    mean_df = merged_df.groupby('n_update').mean()
    mean = np.mean(list(mean_df['return_mean']), axis=0)
    score_df = score_df.append(pd.DataFrame(dict(algo=algo, task=task, mean=mean), index=[count]))
    count += 1

    lineplot_from_df(mean_df, x='n_update', y='return_mean', title=f'{exp_ident}({len(exp_result)})')


## Print a table of mean return

In [None]:
score_df.pivot_table(index=['task'], columns='algo', values='mean')

## Plot loss curves

In [None]:
from pathlib import Path
train_folder = Path(policy_dir).parent.absolute()
progress_path = os.path.join(train_folder, 'progress.csv')
progress_df = pd.read_csv(progress_path)

ax = sns.lineplot(x='n_updates', y='loss', data=progress_df)
ax.set_title(f"{benchmark_name}-{train_exp_ident}")

fig = ax.get_figure()
fig.savefig(f"{test_dir}/loss_curve.png")