In [1]:
%load_ext tensorboard
%load_ext autoreload
%autoreload 2

In [2]:
import time

import numpy as np

from rmrl.experiments.configurations import *
from rmrl.experiments.with_transfer import WithTransferExperiment
from rmrl.experiments.runner import ExperimentsRunner
from rmrl.utils.misc import sha3_hash

import matplotlib.pyplot as plt

SEED = 42
NUM_SEEDS = 3

In [3]:
def get_simple_exp(seed=SEED, mods=None):
    return WithTransferExperiment(
        cfg=ExperimentConfiguration(
            env=SupportedEnvironments.SMALL,
            cspace=ContextSpaces.FIXED_ENTITIES,
            alg=Algos.PPO,
            mods=mods or [],
            rm_kwargs={
                'grid_resolution': (2, 2)
            },
            model_kwargs=dict(
                gnn_hidden_dims=[32, 32],
            ),
            alg_kwargs={
                # 'learning_starts': 0,
                # 'exploration_fraction': 0.5,
                'n_steps': 1024
            },
            seed=seed
        ),
        total_timesteps=1e5,
        dump_dir='single_experiment_dumps',
        verbose=1,
        log_interval=1,
        eval_freq=500,
        min_evals=10,
        n_eval_episodes=10,
    )

In [4]:
experiments = []
for seed in [SEED * i for i in range(1, NUM_SEEDS + 1)]:
    rs_only = get_simple_exp(seed, mods=[Mods.RS])
    rm_as = get_simple_exp(seed, mods=[Mods.AS, Mods.RS])
    rm_as_geco = get_simple_exp(seed, mods=[Mods.AS, Mods.RS, Mods.GECO])
    rm_as_gecoupt = get_simple_exp(seed, mods=[Mods.AS, Mods.RS, Mods.GECOUPT])
    experiments.extend([rs_only, rm_as, rm_as_geco, rm_as_gecoupt])

In [25]:
pairs = ExperimentsRunner.load_or_sample_contexts(exp=experiments[0],
                                                  num_samples=3,
                                                  sample_seed=24)

In [101]:
def load_exp_eval_for_single_seed(experiments, seed_idx, src, tgt):
    exp_start_idx = seed_idx * 4
    exp_end_idx = exp_start_idx + 4
    
    src_name = sha3_hash(src)
    tgt_name = sha3_hash(tgt)
    
    cur_exps = experiments[exp_start_idx:exp_end_idx]
    src_evals = [
        np.load(exp.eval_log_dir / src_name / 'evaluations.npz')
        for exp in cur_exps
    ]
    
    tgt_evals = [
        np.load(exp.eval_log_dir / tgt_name / 'evaluations.npz')
        for exp in cur_exps
    ]
    
    tsf_evals = [
        np.load(exp.eval_log_dir / f'{tgt_name}_transfer_from_{src_name}' / 'evaluations.npz')
        for exp in cur_exps
    ]
    
    return src_evals, tgt_evals, tsf_evals

def plot_single_context_evals(evals, lbl_prefix=None, with_std=False, ax=None):
    lbl_prefix = f'{lbl_prefix} ' if lbl_prefix else ''
    
    npz_rs_only, npz_rm_as, npz_rm_as_geco, npz_rm_as_gecoupt = evals
    plotter(npz_rs_only, lbl_prefix + 'RS only', with_std, ax)
    plotter(npz_rm_as, lbl_prefix + 'RSAS (camacho)', with_std, ax)
    plotter(npz_rm_as_geco, lbl_prefix + 'RSAS GECO (ours)', with_std, ax)
    plotter(npz_rm_as_gecoupt, lbl_prefix + 'RSAS GECO-UPT (ours)', with_std, ax)

def plotter(npz, label, with_std=False, ax=None):
    if ax is None:
        _, ax = plt.subplots(1, 1, figsize=(10, 7))
    
    x = npz['timesteps']
    ys = npz['results']
    means = np.mean(ys, axis=-1)
    stds = np.std(ys, axis=-1)
    
    ax.plot(x, means, label=label)
    if with_std:
        ax.fill_between(x, means - stds, means + stds, alpha=0.2)

def plot_compare_single_seed(experiments, seed_idx, src, tgt, axes=None):
    if axes is None:
        _, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))
    else:
        ax1, ax2 = axes
    src_evals, tgt_evals, tsf_evals = load_exp_eval_for_single_seed(experiments,
                                                                    seed_idx,
                                                                    src,
                                                                    tgt)
    ax1.set_title(f'Policies trained from scratch on SRC context (seed {seed_idx})')
    ax1.set_xlabel('timesteps')
    ax1.set_ylabel('mean reward')
    plot_single_context_evals(src_evals, ax=ax1)
    ax1.legend()
    
    ax2.set_title(f'Policies trained from scratch on SRC context (seed {seed_idx})')
    ax2.set_xlabel('timesteps')
    ax2.set_ylabel('mean reward')
    # plot_single_context_evals(tgt_evals, 'scratch', ax=ax2)
    plot_single_context_evals(tsf_evals, 'transfer', ax=ax2)
    ax2.legend()


def plot_mean_over_all_seeds(experiments, num_seeds, src, tgt):
    _, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))
    
    new_evals = [tuple({'timesteps': 0, 'results': 0} for _ in range(4)) for _ in range(3)]
    
    for seed_idx in range(num_seeds):
        seed_evals = load_exp_eval_for_single_seed(experiments,
                                                   seed_idx,
                                                   src,
                                                   tgt)

        for i, sub_eval in enumerate(evals):
            pass
    
    

In [None]:
CONTEXT_IDX = 1
SEED_IDX = 1

src, tgt = pairs[CONTEXT_IDX]
plot_compare_single_seed(experiments, SEED_IDX, src, tgt)