In [7]:
%load_ext autoreload
%autoreload 2
from utils.plotting import get_colors, load_config, plot
from utils.data_handling import load_dqn_data
import numpy as np

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### Name explanations
* DQN -> standard DQN
* DAR_min^max -> Dynamic action repetition with small repetition and long repetition values
* tqn -> TempoRL DQN with separate skip-DQN that expects the behaviour action to be concatenated to the state
* t-dqn -> TempoRL DQN with separate skip-DQN that expects the behaviour action as contextual input
* tdqn -> TempoRL DQN with shared state representation between the behavoiur and skip action outputs.

In [8]:
import json
import glob
import os
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sb

from scipy.signal import savgol_filter
    

# Somehow the plotting functionallity I ended up with was already covered for the tabular case.
# I should have just used the plot function from that.
def plotMultiple(data, ylim=None, title='', logStepY=False, max_steps=200, xlim=None, figsize=None,
                 alphas=None, smooth=5, savename=None, rewyticks=None, lenyticks=None,
                 skip_stdevs=[], dont_label=[], dont_plot=[]):
    """
    Simple plotting method that shows the test reward on the y-axis and the number of performed training steps
    on the x-axis.
    
    data -> (dict[agent name] -> list([rewards, lens, decs, train_steps, train_episodes])) the data to plot
    ylim -> (list) y-axis limit
    title -> (str) title on top of plot
    logStepY -> (bool) flag that indicates if the y-axis should be on log scale.
    max_steps -> (int) maximal episode length
    xlim -> (list) x-axis limits
    figsize -> (list) dimensions of the figure
    alphas -> (dict[agent name] -> float) the alpha value to use for plotting of specific agents
    smooth -> (int) the window size for smoothing (has to be odd if used. < 0 deactivates this option)
    savename -> (str) filename to save the figure
    rewyticks -> (list) yticks for the reward plot
    lenyticks -> (list) yticks for the decisions plot
    skip_sdevs -> (list) list of names to not plot standard deviations for.
    dont_label -> (list) list of names to not label.
    dont_plot -> (list) list of names to not plot.
    """
    
    if smooth and smooth > 0:
        degree = 2
        for agent in data:
            data[agent] = list(data[agent])  # we have to convert the tuple to lists
            data[agent][0] = list(data[agent][0])
            data[agent][0][0] = savgol_filter(data[agent][0][0], smooth, degree)  # smooth the mean reward
            data[agent][0][1] = savgol_filter(data[agent][0][1], smooth, degree)  # smooth the stdev reward
            data[agent][1] = list(data[agent][1])
            data[agent][1][0] = savgol_filter(data[agent][1][0], smooth, degree)  # smooth mean num steps
            data[agent][1][1] = savgol_filter(data[agent][1][1], smooth, degree)
            data[agent][2] = list(data[agent][2])
            data[agent][2][0] = savgol_filter(data[agent][2][0], smooth, degree)  # smooth mean decisions
            data[agent][2][1] = savgol_filter(data[agent][2][1], smooth, degree)

    colors, color_map = get_colors()
    

    cfg = load_config()
    sb.set_style(cfg['plotting']['seaborn']['style'])
    sb.set_context(cfg['plotting']['seaborn']['context']['context'],
                   font_scale=cfg['plotting']['seaborn']['context']['font scale'],
                   rc=cfg['plotting']['seaborn']['context']['rc2'])

    if figsize:
        fig, ax = plt.subplots(2, figsize=figsize, dpi=100, sharex=True)
    else:
        fig, ax = plt.subplots(2, figsize=(20, 10), dpi=100,sharex=True)
    ax[0].set_title(title)

    for agent in list(data.keys())[::-1]:
        if agent in dont_plot:
            continue
        try:
            alph = alphas[agent]
        except:
            alph = 1.
        color_name = None
        if 'dar' in agent:
            color_name = color_map['dar']
        elif agent.startswith('t'):
            color_name = color_map['t-dqn']
        else:
            color_name = color_map[agent]
        rew, lens, decs, train_steps, train_eps = data[agent]
        
        label = agent.upper()
        if agent.startswith('t'):
            label = 't-DQN'
        elif agent in dont_label:
            label = None

        #### Plot rewards
        ax[0].step(train_steps[0][::5], rew[0][::5], where='post', c=colors[color_name], label=label,
                   alpha=alph)
        if agent not in skip_stdevs:
            ax[0].fill_between(train_steps[0][::5], rew[0][::5]-rew[1][::5], rew[0][::5]+rew[1][::5], alpha=0.25 * alph, step='post',
                               color=colors[color_name])
        #### Plot lens
        ax[1].step(train_steps[0], decs[0], where='post', c=np.array(colors[color_name]), ls='-',
                   alpha=alph)
        if agent not in skip_stdevs:
            ax[1].fill_between(train_steps[0][::5], decs[0][::5]-decs[1][::5], decs[0][::5]+decs[1][::5], alpha=0.125 * alph, step='post',
                               color=np.array(colors[color_name]))
        ax[1].step(train_steps[0][::5], lens[0][::5], where='post',
                   c=np.array(colors[color_name]) * .75, alpha=alph, ls=':')
        
        if agent not in skip_stdevs:
            ax[1].fill_between(train_steps[0][::5], lens[0][::5]-lens[1][::5], lens[0][::5]+lens[1][::5], alpha=0.25 * alph, step='post',
                               color=np.array(colors[color_name]) * .75)
    ax[0].semilogx()
    if rewyticks is not None:
        ax[0].set_yticks(rewyticks)
    if ylim:
        ax[0].set_ylim(ylim)
    if xlim:
        ax[0].set_xlim(xlim)
    ax[0].set_ylabel('Reward')
    if len(data) - len(dont_label) < 5:
        ax[0].legend(ncol=1, loc='best', handlelength=.75)
    ax[1].semilogx()
    if logStepY:
        ax[1].semilogy()
        
    ax[1].plot([-999, -999], [-999, -999], ls=':', c='k', label='all')
    ax[1].plot([-999, -999], [-999, -999], ls='-', c='k', label='dec')
    ax[1].legend(loc='best', ncol=1, handlelength=.75)
    ax[1].set_ylim([1, max_steps])
    if xlim:
        ax[1].set_xlim(xlim)
    ax[1].set_ylabel('#Actions')
    ax[1].set_xlabel('#Train Steps')
    if lenyticks is not None:
        ax[1].set_yticks(lenyticks)
    plt.tight_layout()
    if savename:
        plt.savefig(savename)

    plt.show()


def get_best_to_plot(data, aucs, tempoRL=None):
    """
    Simple method to filter which lines to plot.
    """
    to_plot = dict()

    if tempoRL is None:
        aucs = list(sorted(aucs, key=lambda x: x[1], reverse=True))
        for idx, auc in enumerate(aucs):
            if 't' in auc[0]:
                break
        to_plot[aucs[idx][0]] = data[aucs[idx][0]]  # the absolute best
    else:
        to_plot[tempoRL] = data[tempoRL]

    bv = -np.inf
    b = None
    for elem in aucs:
        if 'dar' not in elem[0]:
            continue
        elif elem[1] > bv:
            b, bv = elem[0], elem[1]
    to_plot[b] = data[b]
    
    
    to_plot['dqn'] = data['dqn']
    print('Only plotting:', list(to_plot.keys()))
    return to_plot

<BR><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR>

In [12]:
mountain_sparse_data = {}
mountain_sparse_alphas = {}
mountain_sparse_aucs = []
max_steps=10**6
thresh = -110

for pairs in [(0, 1), (0, 3), (0, 5), (0, 7), (0, 9)]:
    dar_fm_str = r'$dar_{' + '{}'.format(pairs[0] + 1) + '}^{' + '{}'.format(pairs[1] + 1) + '}$'
    mountain_sparse_alphas[dar_fm_str] = 1/5
    mountain_sparse_data[dar_fm_str] = load_dqn_data(
        '*', 'experiments/featurized_results/sparsemountain/dar_orig_%d_%d' % (pairs[0], pairs[1]), max_steps=max_steps,
        )
    try:
        mountain_sparse_aucs.append([dar_fm_str, np.trapz((mountain_sparse_data[dar_fm_str][0][0] + 200)/110,
                                          x=(mountain_sparse_data[dar_fm_str][3][0]/max(mountain_sparse_data[dar_fm_str][3][0])))])
    except:
        del mountain_sparse_data[dar_fm_str]



mountain_sparse_data['dqn'] = load_dqn_data('*', 'experiments/featurized_results/sparsemountain/dqn', max_steps=max_steps,
                                            )
mountain_sparse_aucs.append(['dqn', np.trapz((mountain_sparse_data['dqn'][0][0] + 200)/110,
                                  x=(mountain_sparse_data['dqn'][3][0]/max(
                                      mountain_sparse_data['dqn'][3][0])))])

for i in [2, 4, 6, 8, 10]:
    mountain_sparse_data['tqn_%d' % i] = load_dqn_data('*', 'experiments/featurized_results/sparsemountain/tqn_%d' % i, max_steps=max_steps,
                                                )
    mountain_sparse_aucs.append(['tqn_%d' % i, np.trapz((mountain_sparse_data['tqn_%d' % i][0][0] + 200)/110,
                                      x=(mountain_sparse_data['tqn_%d' % i][3][0]/max(
                                          mountain_sparse_data['tqn_%d' % i][3][0])))])

    mountain_sparse_data['t-dqn_%d' % i] = load_dqn_data('*', 'experiments/featurized_results/sparsemountain/t-dqn_%d' % i,
                                                  max_steps=max_steps,
                                                  )
    mountain_sparse_aucs.append(['t-dqn_%d' % i, np.trapz((mountain_sparse_data['t-dqn_%d' % i][0][0] + 200)/110,
                                   x=(mountain_sparse_data['t-dqn_%d' % i][3][0]/max(
                                       mountain_sparse_data['t-dqn_%d' % i][3][0])))])

    mountain_sparse_data['tdqn_%d' % i] = load_dqn_data('*', 'experiments/featurized_results/sparsemountain/tdqn_%d' % i, max_steps=max_steps,
                                                 )
    mountain_sparse_aucs.append(['tdqn_%d' % i, np.trapz((mountain_sparse_data['tdqn_%d' % i][0][0] + 200)/110,
                                   x=(mountain_sparse_data['tdqn_%d' % i][3][0]/max(
                                       mountain_sparse_data['tdqn_%d' % i][3][0])))])

C:\Users\mqr12\Desktop\TempoRL-master\experiments\featurized_results\sparsemountain\dar_orig_0_1\*\eval_scores.json
C:\Users\mqr12\Desktop\TempoRL-master\experiments\featurized_results\sparsemountain\dar_orig_0_3\*\eval_scores.json
C:\Users\mqr12\Desktop\TempoRL-master\experiments\featurized_results\sparsemountain\dar_orig_0_5\*\eval_scores.json
C:\Users\mqr12\Desktop\TempoRL-master\experiments\featurized_results\sparsemountain\dar_orig_0_7\*\eval_scores.json
C:\Users\mqr12\Desktop\TempoRL-master\experiments\featurized_results\sparsemountain\dar_orig_0_9\*\eval_scores.json
C:\Users\mqr12\Desktop\TempoRL-master\experiments\featurized_results\sparsemountain\dqn\*\eval_scores.json


TypeError: 'numpy.float64' object is not iterable

In [10]:
mountain_sparse_plot = get_best_to_plot(mountain_sparse_data, mountain_sparse_aucs)

plotMultiple(mountain_sparse_plot, title='MountainCar-v0',
             ylim=[-200, -100], max_steps=200, xlim=[10**3, 10**6], smooth=11,
             savename='mcv0-sparse.pdf', rewyticks=[-190, -150, -110], lenyticks=[0, 75, 150])

UnboundLocalError: local variable 'idx' referenced before assignment

In [None]:
list(sorted(mountain_sparse_aucs, key=lambda x: x[1], reverse=True))

<BR><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR>

In [4]:
moon_dense_data = {}
moon_dense_alphas = {}
moon_dense_aucs = []
max_steps=10**6
thresh=200

for pairs in [(0, 1), (0, 3), (0, 5), (0, 7), (0, 9)]:
    dar_fm_str = r'$dar_{' + '{}'.format(pairs[0] + 1) + '}^{' + '{}'.format(pairs[1] + 1) + '}$'
    moon_dense_alphas[dar_fm_str] = 1/5
    moon_dense_data[dar_fm_str] = load_dqn_data(
        '*', 'experiments/featurized_results/moon/dar_orig_%d_%d' % (pairs[0], pairs[1]), max_steps=max_steps,
        )
    moon_dense_aucs.append([dar_fm_str, np.trapz((moon_dense_data[dar_fm_str][0][0] + 250) / 500,
                                      x=(moon_dense_data[dar_fm_str][3][0]/max(
                                          moon_dense_data[dar_fm_str][3][0])))])

    
moon_dense_data['dqn'] = load_dqn_data('*', 'experiments/featurized_results/moon/dqn', max_steps=max_steps,
        )
moon_dense_aucs.append(['dqn', np.trapz((moon_dense_data['dqn'][0][0] + 250) / 500,
                                  x=(moon_dense_data['dqn'][3][0]/max(moon_dense_data['dqn'][3][0])))])


for i in [2, 4, 6, 8, 10]:
    moon_dense_data['tqn_%d' % i] = load_dqn_data('*', 'experiments/featurized_results/moon/tqn_%d' % i, max_steps=max_steps,
            )
    # compute normalized AUC
    moon_dense_aucs.append(['tqn_%d' % i, np.trapz((moon_dense_data['tqn_%d' % i][0][0] + 250) / 500,
                                      x=(moon_dense_data['tqn_%d' % i][3][0]/max(moon_dense_data['tqn_%d' % i][3][0])))])

    moon_dense_data['t-dqn_%d' % i] = load_dqn_data('*', 'experiments/featurized_results/moon/t-dqn_%d' % i, max_steps=max_steps,
            )
    moon_dense_aucs.append(['t-dqn_%d' % i, np.trapz((moon_dense_data['t-dqn_%d' % i][0][0] + 250)/500,
                                   x=(moon_dense_data['t-dqn_%d' % i][3][0]/max(moon_dense_data['t-dqn_%d' % i][3][0])))])

    moon_dense_data['tdqn_%d' % i] = load_dqn_data('*', 'experiments/featurized_results/moon/tdqn_%d' % i, max_steps=max_steps,
            )
    moon_dense_aucs.append(['tdqn_%d' % i, np.trapz((moon_dense_data['tdqn_%d' % i][0][0] + 250)/500,
                                   x=(moon_dense_data['tdqn_%d' % i][3][0]/max(moon_dense_data['tdqn_%d' % i][3][0])))])

C:\Users\mqr12\Desktop\TempoRL-master\experiments\featurized_results\moon\dar_orig_0_1\*\eval_scores.json


TypeError: 'numpy.float64' object is not iterable

In [5]:
moon_plot = get_best_to_plot(moon_dense_data, moon_dense_aucs)

plotMultiple(moon_plot, title='LunarLander-v2', ylim=[-250, 200], max_steps=1000, xlim=[10**3, 10**6],
             smooth=11, savename='llv2-dense.pdf', rewyticks=[-250, 0, 200], lenyticks=[200, 500, 800])

UnboundLocalError: local variable 'idx' referenced before assignment

In [6]:
list(sorted(moon_dense_aucs, key=lambda x: x[1], reverse=True))

[]