In [None]:
%reset

In [None]:
import pandas as pd
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import pylab as pl
import matplotlib
import time
from ggplot import *
import os
import matplotlib.pyplot as plt
import seaborn as sns
import logging
%matplotlib inline

font = {'family' : 'bold',
        'size'   : 48}

matplotlib.rc('font', **font)
matplotlib.rc('legend', loc='lower right')

sns.set_context("paper", rc={"font.size":48,
                             "font.family": 'bold',
                             "axes.titlesize":42,
                             "axes.labelsize":42, 
                             "legend.fontsize": 24,
                             "legend.loc": 'lower right',
                             "xtick.labelsize": 30,
                             "ytick.labelsize": 30})  
pl.rcParams['figure.figsize'] = 15, 12

In [None]:
mujoco_dfs = []
mujoco_runs = []
points_dfs = []
points_runs = []
hook_dfs = []
hook_runs = []
hookrandom_dfs = []
hookrandom_runs = []

In [None]:
if os.path.isfile('points_step_df.pkl'):
    points_step_df = pd.read_pickle("points_step_df.pkl")
    points_episode_df = pd.read_pickle("points_episode_df.pkl")
    points_eval_episode_df = pd.read_pickle("points_eval_episode_df.pkl")
    points_dfs = [points_step_df, points_episode_df, points_eval_episode_df]
    points_runs = set(list(points_eval_episode_df['run']))
    
if os.path.isfile('mujoco_step_df.pkl'):
    mujoco_step_df = pd.read_pickle("mujoco_step_df.pkl")
    mujoco_episode_df = pd.read_pickle("mujoco_episode_df.pkl")
    mujoco_eval_episode_df = pd.read_pickle("mujoco_eval_episode_df.pkl")
    mujoco_dfs = [mujoco_step_df, mujoco_episode_df, mujoco_eval_episode_df]
    mujoco_runs = set(list(mujoco_eval_episode_df['run']))
    
if os.path.isfile('hook_step_df.pkl'):
    hook_step_df = pd.read_pickle("hook_step_df.pkl")
    hook_episode_df = pd.read_pickle("hook_episode_df.pkl")
    hook_eval_episode_df = pd.read_pickle("hook_eval_episode_df.pkl")
    hook_dfs = [hook_step_df, hook_episode_df, hook_eval_episode_df]
    hook_runs = set(list(hook_eval_episode_df['run']))

In [None]:
LOG_FILES = ['log.log.1','log.log']
def parse_logs(log_dirs_path, skip_runs=[], current_dfs=[]):
    
    def parse_log(log_dir):
        step_data = []
        episode_data = []
        eval_episode_data = []
        #for log_dir in os.listdir(log_dirs_path):
        cur_log_dir = os.path.join(log_dirs_path,log_dir)
        if log_dir in skip_runs:
            print("Skipping %s"%cur_log_dir)
            return [], [], []
        print("Parsing %s"%cur_log_dir)
        seed = 0
        for seed_dir in os.listdir(cur_log_dir):
            seed+=1
            for log_file_name in LOG_FILES:
                log_file_path = os.path.join(cur_log_dir,seed_dir,log_file_name)
                if not os.path.exists(log_file_path):
                    continue
                with open(log_file_path, 'r') as log_file:
                    for line in log_file:
                        if 'Starting DDPG training' in line:
                            at_episode=0
                            at_eval_episode=0
                            at_step_total=0
                            eval_step=0
                            added_eval_episode=False
                            eval_training_steps=0
                        if 'Starting new episode' in line:   
                            at_episode+=1
                            at_step = 0
                        if 'from policy' in line:
                            try:
                                policy_choice = int(line.split(' ')[-5])
                            except:
                                continue
                            at_step+=1
                            at_step_total+=1
                            step_data.append([seed, log_dir, at_episode, at_step, policy_choice])
                            added_eval_episode=False
                        if 'Episode finished!' in line:
                            try:
                                reward = float(line.split('=')[-1])
                                episode_data.append([seed, log_dir, at_eval_episode, at_step_total, reward])
                            except Exception as e:
                                logging.error('Issue with parsing line!')
                        if 'Eval episode finished' in line:
                            if not added_eval_episode:
                                eval_training_steps+=200
                            reward = float(line.split('=')[-1])
                            eval_episode_data.append([seed, log_dir, at_eval_episode, eval_training_steps, reward])
                            at_eval_episode+=1
                            added_eval_episode=True
        return step_data, episode_data, eval_episode_data
    
    step_datas = []
    episode_datas = []
    eval_episode_datas = []
    for log_dir in os.listdir(log_dirs_path):
        step_data, episode_data, eval_episode_data = parse_log(log_dir)
        step_datas+=step_data
        episode_datas+=episode_data
        eval_episode_datas+=eval_episode_data
        
    step_df = pd.DataFrame(step_datas, columns=['seed', 'run', 'episode', 'step', 'policy_choice'])
    episode_df = pd.DataFrame(episode_datas, columns=['seed', 'run', 'episode', 'step', 'episode reward'])
    eval_episode_df = pd.DataFrame(eval_episode_datas, columns=['seed', 'run', 'episode', 'training steps', 
                                                               'episode reward'])
    if current_dfs:
        step_df = pd.concat([current_dfs[0], step_df])
        episode_df = pd.concat([current_dfs[1], episode_df])
        eval_episode_df = pd.concat([current_dfs[2], eval_episode_df])
        
    return step_df,episode_df,eval_episode_df

In [None]:
points_dfs = parse_logs('SparseGoalInStatePointsPath-v0', points_runs, points_dfs)
points_step_df, points_episode_df, points_eval_episode_df = points_dfs

In [None]:
mujoco_dfs = parse_logs('OneGoalPickPlaceDenseEnv-v0', mujoco_runs, mujoco_dfs)
#mujoco_dfs = parse_log('OneGoalPickPlaceEnv-v0', mujoco_runs, mujoco_dfs)
mujoco_step_df, mujoco_episode_df, mujoco_eval_episode_df = mujoco_dfs

In [None]:
hook_dfs = parse_logs('FetchHookSweepPushDenseEasyInitEasierTaskCloseInitNoGrasp-v0', hook_runs, hook_dfs)
hook_step_df, hook_episode_df, hook_eval_episode_df = hook_dfs

In [None]:
hookrandom_dfs = parse_logs('FetchHookSweepPushDenseEasyInitEasierTaskCloseInitNoGraspRandomized-v0', hookrandom_runs, hookrandom_dfs)
hookrandom_step_df, hookrandom_episode_df, hookrandom_eval_episode_df = hookrandom_dfs

In [None]:
points_runs = set(list(points_eval_episode_df['run']))
points_runs

In [None]:
mujoco_runs = set(list(mujoco_eval_episode_df['run']))
mujoco_runs

In [None]:
hook_runs = set(list(hook_eval_episode_df['run']))
hook_runs

In [None]:
hook_eval_episode_df.loc[hook_eval_episode_df['run']=='efficiency_no_teachers_ddpg','run'] = 'bayesian_ddpg'

In [None]:
hookrandom_eval_episode_df.loc[hook_eval_episode_df['run']=='efficiency_no_teachers_ddpg','run'] = 'bayesian_ddpg'

In [None]:
### Blurb to check that we have 5 seeds for all the runs we care about... ###

POINTS_RUNS_WE_CARE_ABOUT = [
    # these are figure 3a (suboptimal + partial teachers)
    'efficiency_1full_optimal',   
    'efficiency_1full_suboptimal',
    'efficiency_4partial_optimal',
    'efficiency_4partial_noisy',
    # these are figure 3b (incomplete + contradictory teacher sets)
    'efficiency_3partial_noisy',
    'efficiency_2partial_noisy',
    'efficiency_1partial_noisy',
    'contradiction_halfway',
    # these are figure 3c (larger + lower quality teacher sets)
    'sensitivity_bad_teachers_1random',
    'sensitivity_bad_teachers_2random',
    'sensitivity_bad_teachers_4random',
    'sensitivity_bad_teachers_1adversarial',
]

MUJOCO_RUNS_WE_CARE_ABOUT = [
    # (suboptimal + partial teachers)
    'efficiency_1full_optimal', 
    'efficiency_1full_suboptimal',
    'efficiency_partial_complete_suboptimal',
    'efficiency_partial_complete_optimal',
    # (incomplete + contradictory teacher sets)
    'efficiency_partial_noisy',
    # (TODO: missing place-only teacher sets)
    'efficiency_partial_place_noisy',
    # (TODO: missing contradictory)
    'contradiction_halfway',
    # (larger + lower quality teacher sets)
    'sensitivity_bad_teachers_1random',
    'sensitivity_bad_teachers_2random',
    'sensitivity_bad_teachers_4random',
    # (TODO: missing adversarial teacher)
]

#tmp_df = points_eval_episode_df
#RUNS_WE_CARE_ABOUT = POINTS_RUNS_WE_CARE_ABOUT

tmp_df = mujoco_eval_episode_df
RUNS_WE_CARE_ABOUT = MUJOCO_RUNS_WE_CARE_ABOUT

tmp_policy_names = ['ours', 'random', 'dqn', 'bayesian_ddpg', 'ddpgcritic']
for tmp_run_type in RUNS_WE_CARE_ABOUT:
    for tmp_policy_name in tmp_policy_names:
        tmp_run = '%s_%s'%(tmp_run_type, tmp_policy_name) if tmp_policy_name!='bayesian_ddpg' else 'bayesian_ddpg'
        if 'bayesian_ddpg' in tmp_policy_names:
            tmp_sub_df = tmp_df[tmp_df['run'].str.startswith(tmp_run_type) | 
                        (tmp_df['run']=='bayesian_ddpg') &
                        (tmp_df['seed']<=5)].copy()
        else:
            tmp_sub_df = tmp_df[tmp_df['run'].str.startswith(tmp_run_type) &
                        (tmp_df['seed']<=5)].copy()

        tmp_sub_sub_df = tmp_sub_df.loc[tmp_sub_df['run']==tmp_run]
        try:
            max_seed = int(tmp_sub_sub_df['seed'].max())
            if max_seed < 5:
                print("WARNING: Run {} for type {} has {} seeds.".format(tmp_policy_name, tmp_run_type, max_seed))
        except:
            print('WARNING: Could not get data for run {} and type {}'.format(tmp_policy_name, tmp_run_type))

In [None]:
points_step_df.to_pickle("points_step_df.pkl")
points_episode_df.to_pickle("points_episode_df.pkl")
points_eval_episode_df.to_pickle("points_eval_episode_df.pkl")
mujoco_step_df.to_pickle("mujoco_step_df.pkl")
mujoco_episode_df.to_pickle("mujoco_episode_df.pkl")
mujoco_eval_episode_df.to_pickle("mujoco_eval_episode_df.pkl")
hook_step_df.to_pickle("hook_step_df.pkl")
hook_episode_df.to_pickle("hook_episode_df.pkl")
hook_eval_episode_df.to_pickle("hook_eval_episode_df.pkl")

In [None]:
def get_bounds(df):
    optimal_df = df[df['run'] == 'optimal']
    if len(optimal_df) == 0:
        optimal_df = df[df['run'] == 'run_optimal']
    optimal_noisy_df = df[df['run'] == 'optimal_noisy']
    if len(optimal_noisy_df) == 0:
        optimal_noisy_df = df[df['run'] == 'run_optimal_noisy']
    random_df = df[df['run'] == 'random']
    optimal_reward = optimal_df['episode reward'].mean()
    random_reward = random_df['episode reward'].mean()
    teacher_reward = optimal_noisy_df['episode reward'].mean()
    return optimal_reward, random_reward, teacher_reward

In [None]:
def get_plottable_df(df, run_type, 
                     name_mapping={'ours': 'B-DDPG + ACT (ours)', 
                                   'dqn': 'B-DDPG + DQN', 
                                   'random': 'B-DDPG + Random', 
                                   'ddpgcritic': 'DDPG + Critic',
                                   'bayesian_ddpg': 'B-DDPG (no teachers)'}, 
                     policy_names=['ours', 'random', 'dqn', 'bayesian_ddpg', 'ddpgcritic'], 
                     smoothing=0.05, print_seed_counts=True):
    if 'bayesian_ddpg' in policy_names:
        sub_df = df[(df['run'].str.startswith(run_type) | 
                    (df['run']=='bayesian_ddpg')) &
                    (df['seed']<=5)].copy()
    else:
        sub_df = df[df['run'].str.startswith(run_type) &
                    (df['seed']<=5)].copy()

    df['smooth rewards'] = 0
    for policy_name in policy_names:
        run = '%s_%s'%(run_type, policy_name) if policy_name!='bayesian_ddpg' else 'bayesian_ddpg'
        sub_sub_df = sub_df.loc[sub_df['run']==run]
        try:
            max_seed = int(sub_sub_df['seed'].max())
        except:
            print('Could not get data for policy %s'%policy_name)
            continue
        if print_seed_counts:
            print('Run %s has %d seeds'%(run,max_seed))
            
        if smoothing:
            for seed in range(1, max_seed+1):
                if sub_df.loc[sub_df['seed']==seed]['episode reward'].mean()==0:
                    sub_df = sub_df.loc[sub_df['seed']!=seed]
                else:
                    sub_sub_sub_df = sub_sub_df.loc[sub_sub_df['seed']==seed]
                    try:
                        sub_df.loc[(sub_df['run']==run) & (sub_df['seed']==seed) ,'smooth rewards'] = \
                            sub_sub_sub_df['episode reward'].rolling(int(smoothing*len(sub_sub_sub_df)),1).mean()
                    except:
                        sub_df = sub_df.loc[sub_df['seed']!=seed]

        try: 
            sub_df.loc[sub_df['run']==run,'run'] = name_mapping[policy_name]
        except:
            pass
    return sub_df

In [None]:
def plot_df(df, x='training steps', y='smooth rewards', 
            optimal=None, 
            teacher_optimal=None,
            random=None,
            ordered_labels=['B-DDPG + ACT (ours)',
                            'DDPG + Critic',
                            'B-DDPG + DQN',
                            'B-DDPG + Random',
                            'B-DDPG (no teachers)',
                            'DDPG'],
            palette={'B-DDPG + ACT (ours)': 'royalblue', 
                 'B-DDPG + DQN': 'orange', 
                 'B-DDPG + Random': 'g', 
                 'DDPG + Critic': 'r',
                 'B-DDPG (no teachers)': 'darkmagenta'},
            xlim=100000,
            ylim=None,
            title=None,
            name=None,
            legend=True):
    if not legend:
        plot = sns.lineplot(x, y, hue='run', data=df, palette=palette, lw=1, legend=False)
    else:
        plot = sns.lineplot(x, y, hue='run', data=df, palette=palette, lw=1)
    # plot = sns.lineplot(x, y, hue='run', data=df, palette=palette)
    # plot = sns.lineplot(x, y, hue='run', data=df, ci='sd')
    axes = plot.axes
    if title:
        axes.set_title(title)
    handles, labels = axes.get_legend_handles_labels()
    for label in list(ordered_labels):
        if label not in labels or label == 'run':
            ordered_labels.remove(label)
    ordered_handles = []
    for label in ordered_labels:
        idx = labels.index(label)
        if idx!=-1:
            ordered_handles.append(handles[idx])
    plt.xlim(0,xlim)
    if ylim:
        plt.ylim(ylim[0],ylim[1])
    plot.set_xlabel("Training Steps",fontsize=28)
    plot.set_ylabel("Discounted Sum of Rewards",fontsize=28)
#     plot.set_ylabel("",fontsize=28)
    if optimal: 
        h = plt.axhline(y=optimal, color='black', linestyle='--', linewidth=2.5)
        # plt.text(x=xlim+20, y=optimal, s="Optimal", fontsize=14)  
        ordered_handles.append(h)
        ordered_labels.append('Full Teacher')
    if teacher_optimal: 
        h = plt.axhline(y=teacher_optimal, color='black', linestyle=':', linewidth=2.5)
        # plt.text(x=xlim+20, y=teacher_optimal, s="Teachers", fontsize=14)   
        ordered_handles.append(h)
        ordered_labels.append('Full Teacher + Noise')
    if random: 
        h = plt.axhline(y=random, color='black', linestyle=':', linewidth=2.5)
        # plt.text(x=xlim+20, y=random, s="Random", fontsize=14) 
        ordered_handles.append(h)
        ordered_labels.append('Random Agent')
        
    if legend:
        leg = axes.legend(handles=ordered_handles, 
                          labels=ordered_labels, 
                          bbox_to_anchor=(0., 1.02, 1., .102), 
                          loc=4,#3
                          ncol=3,#len(ordered_handles)
                         )
        for line in leg.get_lines():
            line.set_linewidth(5.0)
    
    if name is not None:
        plt.savefig(name, bbox_inches='tight')
    plt.show()
    

In [None]:
name_mapping = {'ours': 'B-DDPG + ACT (ours)', 
                'dqn': 'B-DDPG + DQN', 
                'random': 'B-DDPG + Random', 
                'ddpgcritic': 'DDPG + Critic',
                'bayesian_ddpg': 'B-DDPG (no teachers)'}

policy_names = ['ours', 'random', 'dqn', 'bayesian_ddpg', 'ddpgcritic', ]

ordered_labels = ['B-DDPG + ACT (ours)',
                      'B-DDPG + DQN',
                      'B-DDPG + Random',
                      'DDPG + Critic',
                      'B-DDPG (no teachers)']

palette = {'B-DDPG + ACT (ours)': 'royalblue', 
           'B-DDPG + ACT (ours, no commitment)': 'brown',
           'B-DDPG + DQN': 'orange', 
           'B-DDPG + Random': 'g', 
           'DDPG + Critic': 'r',
           'B-DDPG (no teachers)': 'darkmagenta'}

def plot(r, df, 
         bounds_df=None, 
         legend=False, 
         name=None, 
         title=None,
         tmp_name_mapping=name_mapping,
         tmp_policy_names=policy_names,
         tmp_ordered_labels=ordered_labels,
         tmp_palette=palette,
         x='training steps', 
         xlim=None,
         ylim=None):
    optimal_reward = None
    random_reward = None
    teacher_reward = None
    if bounds_df is not None:
        optimal_reward, random_reward, teacher_reward = get_bounds(bounds_df)
        
    plottable = get_plottable_df(df, r, name_mapping=tmp_name_mapping, policy_names=tmp_policy_names)
        
    if name is not None:
        name = 'figs/{}.pdf'.format(name)
        
    plot_df(plottable, x=x, optimal=optimal_reward, 
            teacher_optimal=teacher_reward, 
            name=name, 
            title=title,
            legend=legend, 
            ordered_labels=tmp_ordered_labels, 
            palette=tmp_palette,
            xlim=xlim,
            ylim=ylim)
    
def plot_path(run_name, plot_legend=False, title=None):
    plot(run_name, 
         points_eval_episode_df, 
         points_episode_df, 
         plot_legend, 
         'path_'+run_name,
         title=title,
         xlim=100000,
         ylim=(-0.1,3.4))
    
def plot_mujoco(run_name, plot_legend=False, title=None):
    plot(run_name, 
         mujoco_eval_episode_df, 
         mujoco_episode_df, 
         plot_legend, 
         'mujoco_'+run_name,
         title=title,
         xlim=500000,
         ylim=(-16,-4))
    
def plot_hook(run_name, plot_legend=False, title=None):
    plot(run_name, 
         hook_eval_episode_df, 
         hook_episode_df, 
         plot_legend, 
         'hook_'+run_name,
         title=title,
         xlim=250000,
         ylim=(-30,-5))
    
def plot_hookrandom(run_name, plot_legend=False, title=None):
    plot(run_name, 
         hookrandom_eval_episode_df, 
         hookrandom_episode_df, 
         plot_legend, 
         'hook_'+run_name,
         title=title,
         xlim=250000,
         ylim=(-30,-5))

In [None]:
path_plots = [
             ('efficiency_1full_optimal','1 Full Optimal Teacher'),
             ('efficiency_1full_suboptimal','1 Full Suboptimal Teacher'),
             ('efficiency_4partial_optimal','4 Sufficient Partial Optimal Teachers'),
             ('efficiency_4partial_noisy','4 Sufficient Partial Suboptimal Teachers'),
             ('efficiency_3partial_noisy','3 Insufficient Partial Suboptimal Teachers'),
             ('efficiency_2partial_noisy','2 Insufficient Partial Suboptimal Teachers'),
             ('efficiency_1partial_noisy','1 Insufficient Partial Suboptimal Teacher'),
             ('contradiction_halfway','2 Sufficient Partial Contradictory Teachers'),
             ('sensitivity_bad_teachers_1random','Sufficient Teacher Set + 1 Random Teacher'),
             ('sensitivity_bad_teachers_2random','Sufficient Teacher Set + 2 Random Teachers'),
             ('sensitivity_bad_teachers_4random','Sufficient Teacher Set + 4 Random Teachers'),
             ('sensitivity_bad_teachers_1adversarial','Sufficient Teacher Set + 1 Adversarial Teacher')
             ]

for run_name,title in path_plots:
    plot_path(run_name, title=title)

In [None]:
mujoco_plots = [
             #('efficiency_1full_optimal','1 Full Teacher'),
             #('efficiency_1full_suboptimal','1 Full Noisy Teacher'),
             ('efficiency_partial_complete_optimal','Sufficient Partial Teachers'),
             ('efficiency_partial_complete_suboptimal','Sufficient Partial Noisy Teachers'),
             ('efficiency_partial_incomplete_optimal','Insufficient Partial Teacher'),
             ('efficiency_partial_incomplete_suboptimal','Insufficient Partial Noisy Teacher'),
             #('contradiction_halfway','Sufficient Partial Contradictory Teachers'),
             #('sensitivity_bad_teachers_1random','1 Added Random Teacher'),
             #('sensitivity_bad_teachers_2random','2 Added Random Teachers'),
             #('sensitivity_bad_teachers_4random','4 Added Random Teachers')
               ]

for run_name,title in mujoco_plots:
    plot_mujoco(run_name, title=None)

In [None]:
hook_plots = [
             ('efficiency_full_optimal','1 Full Teacher'),
             ('efficiency_full_suboptimal','1 Full Noisy Teacher'),
             ('efficiency_partial_sufficient_optimal','Sufficient Partial Teachers'),
             ('efficiency_partial_sufficient_suboptimal','Sufficient Partial Noisy Teachers'),
             ('efficiency_partial_insufficient_optimal','Insufficient Partial Teacher'),
             ('efficiency_partial_insufficient_suboptimal','Insufficient Partial Noisy Teacher'),
            ]

for run_name,title in hook_plots:
    plot_hook(run_name, title=title)

In [None]:
hook_plots = [
             ('efficiency_full_optimal','1 Full Teacher'),
             ('efficiency_full_suboptimal','1 Full Noisy Teacher'),
             ('efficiency_partial_sufficient_optimal','Sufficient Partial Teachers'),
             ('efficiency_partial_sufficient_suboptimal','Sufficient Partial Noisy Teachers'),
             ('efficiency_partial_insufficient_optimal','Insufficient Partial Teacher'),
             ('efficiency_partial_insufficient_suboptimal','Insufficient Partial Noisy Teacher'),
            ]

for run_name,title in hook_plots:
    plot_hookrandom(run_name, title=title)

In [None]:
plot_mujoco('efficiency_1full_suboptimal', title='1 Full Suboptimal Teacher')

In [None]:
def plot_sensitivity_tau(df, legend=False, name=None, x='training steps', xlim=100000):

     ### sensitivity to tau ###
    name_mapping = {'1': 'ACT (tau = 1)', 
                         '10': 'ACT (tau = 10) - ours',
                         '100': 'ACT (tau = 100)',
                         '1000': 'ACT (tau = 1000)',
                         '10000': 'ACT (tau = 10000)'}
    policy_names = ['1', '10', '100', '1000', '10000']

    ordered_labels = ['ACT (tau = 1)',
                      'ACT (tau = 10) - ours',
                      'ACT (tau = 100)',
                      'ACT (tau = 1000)',
                      'ACT (tau = 10000)']

    palette = {'ACT (tau = 1)': 'royalblue', 
               'ACT (tau = 10) - ours': 'orange', 
               'ACT (tau = 100)': 'g', 
               'ACT (tau = 1000)': 'r',
               'ACT (tau = 10000)': 'darkmagenta',
               'bayesian_ddpg': 'black'}
               
    plot('sensitivity_tau', df, 
     legend=legend, 
     name=name, 
     tmp_name_mapping=name_mapping,
     tmp_policy_names=policy_names,
     tmp_ordered_labels=ordered_labels,
     tmp_palette=palette,
     x=x, 
     xlim=xlim)

In [None]:
plot_sensitivity_tau(points_eval_episode_df, legend=True)

In [None]:
def plot_sensitivity_thresh(df, legend=False, name=None, x='training steps', xlim=100000):

     ### sensitivity to tau ###
    name_mapping = {'02': 'ACT (beta = 0.2)', 
                         '04': 'ACT (beta = 0.4)',
                         '05': 'ACT (beta = 0.5)',
                         '06': 'ACT (beta = 0.6) - ours',
                         '08': 'ACT (beta = 0.8)'}
    policy_names = [ '02', '04', '05', '06', '08',]

    ordered_labels = ['ACT (beta = 0.2)',
                      'ACT (beta = 0.4)',
                      'ACT (beta = 0.5)',
                      'ACT (beta = 0.6) - ours',
                      'ACT (beta = 0.8)']

    palette = {'ACT (beta = 0.2)': 'royalblue', 
               'ACT (beta = 0.4)': 'orange', 
               'ACT (beta = 0.5)': 'g', 
               'ACT (beta = 0.6) - ours': 'r',
               'ACT (beta = 0.8)': 'darkmagenta',
               'bayesian_ddpg': 'black'}
               
    plot('sensitivity_commit_thresh', df, 
     legend=legend, 
     name=name, 
     tmp_name_mapping=name_mapping,
     tmp_policy_names=policy_names,
     tmp_ordered_labels=ordered_labels,
     tmp_palette=palette,
     x=x, 
     xlim=xlim)

In [None]:
plot_sensitivity_thresh(points_eval_episode_df, legend=True)

In [None]:
def plot_sensitivity_mc(df, legend=False, name=None, x='training steps', xlim=100000):   
    name_mapping = {'10': 'ACT (MC# = 10)', 
                    '20': 'ACT (MC# = 20)',
                    '30': 'ACT (MC# = 30)',
                    '40': 'ACT (MC# = 40)',
                    '50': 'ACT (MC# = 50) - ours'}
    policy_names = [ '10', '20', '30', '40', '50']

    ordered_labels = ['ACT (MC# = 10)',
                      'ACT (MC# = 20)',
                      'ACT (MC# = 30)',
                      'ACT (MC# = 40)',
                      'ACT (MC# = 50) - ours']

    palette = {'ACT (MC# = 10)': 'royalblue', 
               'ACT (MC# = 20)': 'orange', 
               'ACT (MC# = 30)': 'g', 
               'ACT (MC# = 40)': 'r',
               'ACT (MC# = 50) - ours': 'darkmagenta',
               'bayesian_ddpg': 'black'}
               
    plot('sensitivity_mc', df, 
     legend=legend, 
     name=name, 
     tmp_name_mapping=name_mapping,
     tmp_policy_names=policy_names,
     tmp_ordered_labels=ordered_labels,
     tmp_palette=palette,
     x=x, 
     xlim=xlim)

In [None]:
plot_sensitivity_mc(points_eval_episode_df, legend=True)

In [None]:
def plot_sensitivity_decay(df, legend=False, name=None, x='training steps', xlim=100000): 
    name_mapping = {'9': 'ACT (psi = 0.9)', 
                    '95': 'ACT (psi = 0.95)',
                    '98': 'ACT (psi = 0.98)',
                    '99': 'ACT (psi = 0.99) - ours',
                    '995': 'ACT (psi = 0.995)'}
    policy_names = [ '9', '95', '98', '99', '995']

    ordered_labels = ['ACT (psi = 0.9)',
                      'ACT (psi = 0.95)',
                      'ACT (psi = 0.98)',
                      'ACT (psi = 0.99) - ours',
                      'ACT (psi = 0.995)']

    palette = {'ACT (psi = 0.9)': 'royalblue', 
               'ACT (psi = 0.95)': 'orange', 
               'ACT (psi = 0.98)': 'g', 
               'ACT (psi = 0.99) - ours': 'r',
               'ACT (psi = 0.995)': 'darkmagenta',
               'bayesian_ddpg': 'black'}
               
    plot('sensitivity_decay', df, 
     legend=legend, 
     name=name, 
     tmp_name_mapping=name_mapping,
     tmp_policy_names=policy_names,
     tmp_ordered_labels=ordered_labels,
     tmp_palette=palette,
     x=x, 
     xlim=xlim)

In [None]:
plot_sensitivity_decay(points_eval_episode_df, legend=True)

In [None]:
def plot_ablation(r, df, bounds_df=None, legend=False, name=None, title=None, x='training steps', xlim=100000):
    tmp_name_mapping = {'full': 'ACT (full)', 
                        'no_commitdecay': 'ACT (no commit decay)',
                        'no_commit': 'ACT (no commit)',
                        'no_meta': 'ACT (no behavioral target)',
                        'no_commit_no_meta': 'ACT (no commit & no behavioral target)',
                        'no_commitdecay_no_meta':  'ACT (no commit decay & no behavioral target)'}
    tmp_policy_names = ['full', 'no_commitdecay', 'no_commit', 'no_meta', 
                        'no_commit_no_meta']
    tmp_ordered_labels = ['ACT (full)',
                          'ACT (no commit decay)',
                          'ACT (no commit)',
                          'ACT (no behavioral target)',
                          'ACT (no commit & no behavioral target)']
    tmp_palette = {'ACT (full)': 'royalblue', 
                   'ACT (no commit decay)': 'orange', 
                   'ACT (no commit)': 'g', 
                   'ACT (no behavioral target)': 'r',
                   'ACT (no commit & no behavioral target)': 'darkmagenta',
                   'bayesian_ddpg': 'black'}

    df = get_plottable_df(df, r, 
                          name_mapping=tmp_name_mapping, policy_names=tmp_policy_names)
    if 'incomplete' not in r:
         df = df[(~df['run'].str.contains('incomplete'))].copy()
    
    optimal_reward = None
    random_reward = None
    teacher_reward = None
    if bounds_df is not None:
        optimal_reward, random_reward, teacher_reward = get_bounds(bounds_df)
        
    if name is not None:
        name = 'figs/{}.pdf'.format(name)
        
    plot_df(df, x=x, optimal=optimal_reward, 
            teacher_optimal=teacher_reward, 
            title=title,
            name=name, 
            legend=legend, 
            ordered_labels=tmp_ordered_labels, 
            palette=tmp_palette,
            xlim=xlim)
    return df


In [None]:
plot_ablation('ablation',mujoco_eval_episode_df,xlim=500000,name='mujoco_ablation',title="Ablation for pick and place task")

In [None]:
plot_ablation('ablation',points_eval_episode_df,xlim=100000,name='path_ablation',title="Ablation for path following task")

In [None]:
plot_ablation('ablation_incomplete',points_eval_episode_df,xlim=100000,name='points_ablation_incomplete')

In [None]:
df = plot_ablation('path_ablation',points_episode_df,x='step')
#(ggplot(df, aes('step', 'smooth rewards', colour='run')) + geom_point())

In [None]:
sub_df = mujoco_eval_episode_df[mujoco_eval_episode_df['run'].str.startswith('hyperparam')]
(ggplot(sub_df, aes('training steps', 'episode reward', colour='run')) + geom_line())

In [None]:
r = 'efficiency_4partial_noisy'
optimal_reward, random_reward, teacher_reward = get_bounds(points_episode_df)
df = get_plottable_df(points_eval_episode_df, r, name_mapping=tmp_name_mapping, policy_names=tmp_policy_names)
plot_df(df,legend=False, 
        ordered_labels=tmp_ordered_labels, 
        palette=tmp_palette,
        optimal=optimal_reward, 
        teacher_optimal=teacher_reward,
        xlim=100000)

In [None]:
# optimal_reward, random_reward, teacher_reward = get_bounds(mujoco_episode_df)

In [None]:
(ggplot(df2, aes('training steps', 'smooth rewards', colour='run')) + geom_point())

In [None]:
# step_df[step_df['run']=='train_ours_nocommit'].reset_index().hist('policy_choice')

In [None]:
# @interact(ep_num=widgets.IntSlider(min=1,max=step_df['episode'].max(),step=1))
# def plot(ep_num):
#     step_df[step_df['episode']==ep_num].hist('policy_choice')

In [None]:
# @interact(ep_num=widgets.IntSlider(min=1,max=df['episode'].max(),step=1))
# def plot(ep_num):
#     step_df[df['episode']==ep_num].plot('step', 'policy_choice')
#     matplotlib.pyplot.ylim(bottom=0,top=5)

In [None]:
# pd.pivot_table(df[df['episode']>110][df['episode']<=115].reset_index(),
#               index='step', columns='episode', values='policy_choice'
#              ).plot(subplots=True, sharex=True, sharey=True)