## Repo setup


In [None]:
import os

repo_names = ['gym', 'spinningup']

for repo_name in repo_names:
  cmd_string = f'git clone https://github.com/AnejSvete/{repo_name}.git'
  os.system(cmd_string)

In [None]:
!cd /content/spinningup; pip install -e .

In [None]:
!cd /content/gym; pip install -e .

## Init

In [None]:
!sudo apt-get update && sudo apt-get install libopenmpi-dev

In [None]:
!apt-get install ffmpeg freeglut3-dev xvfb  # For visualization

In [None]:
!pip install mpi4py
!pip install pynput

In [None]:
# Set up fake display; otherwise rendering will fail
import os
os.system("Xvfb :1 -screen 0 1024x768x24 &")
os.environ['DISPLAY'] = ':1'

## Imports

In [None]:
import gym
import numpy as np

In [None]:
import os

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from sklearn.model_selection import ParameterGrid

In [None]:
from spinup import ppo_pytorch as ppo

In [None]:
import tensorflow as tf
import torch

In [None]:
from spinup.utils.test_policy import load_policy_and_env, run_policy

## Experiment params

#### Parameters

In [None]:
model_name = 'ppo'

epochs = 100

ac_kwargs = dict(hidden_sizes=[12, 12])

params_dict = dict(
    steps_per_epoch=[8000],
    gamma=[0.99], 
    clip_ratio=[0.2], 
    pi_lr=[0.002],
    vf_lr=[0.005],
    train_pi_iters=[80], 
    train_v_iters=[80], 
    lam=[0.97],
    max_ep_len=[250],
    target_kl=[0.01],
    seed=[164789, 215965, 433506, 521601, 55231, 589871, 64030, 759786, 811127, 970144]
)

params_grid = ParameterGrid(params_dict)

In [None]:
legend = None
xaxis = 'TotalEnvInteracts'
value = 'Performance'
count = True
smooth = 10
select = None
exclude = None
est = 'mean'

## Train the agent and evaluate it

In [None]:
for ii, params in enumerate(params_grid):

  legend_name = model_name + \
                    '_plr' + str(params['pi_lr']) + \
                    '_vlr' + str(params['vf_lr']) + \
                    '_g' + str(params['gamma']) + \
                    '_l' + str(params['lam'])

  experiment_name = model_name + \
                    '_plr' + str(params['pi_lr']) + \
                    '_vlr' + str(params['vf_lr']) + \
                    '_spe' + str(params['steps_per_epoch']) + \
                    '_gam' + str(params['gamma']) + \
                    '_lam' + str(params['lam']) + \
                    '_tkl' + str(params['target_kl']) + \
                    '_tpi' + str(params['train_pi_iters']) + \
                    '_tvi' + str(params['train_v_iters']) + \
                    '_cr' + str(params['clip_ratio']) + \
                    '_seed' + str(params['seed'])

  model_logdir = os.path.join('/content/spinningup/data/mountain', experiment_name)
  logger_kwargs = dict(output_dir=model_logdir, exp_name=experiment_name)

  env_fn = lambda : gym.make('CartPole-v3', seed=params['seed'])

  print(f'On experiment {ii + 1} / {len(params_grid)}.')

  ppo(env_fn=env_fn, epochs=epochs, ac_kwargs=ac_kwargs, **params, 
      eval_episodes=50, required_quality=0.75, logger_kwargs=logger_kwargs)

  logdir = [model_logdir]

  make_plots(logdir, legend=[legend_name], xaxis=xaxis, values=value, count=count, 
              smooth=smooth, select=select, exclude=exclude,
              estimator=est)

In [None]:
for ii, params in enumerate(params_grid):

  legend_name = model_name + \
                    '_plr' + str(params['pi_lr']) + \
                    '_vlr' + str(params['vf_lr']) + \
                    '_g' + str(params['gamma']) + \
                    '_l' + str(params['lam'])

  experiment_name = model_name + \
                    '_plr' + str(params['pi_lr']) + \
                    '_vlr' + str(params['vf_lr']) + \
                    '_spe' + str(params['steps_per_epoch']) + \
                    '_gam' + str(params['gamma']) + \
                    '_lam' + str(params['lam']) + \
                    '_tkl' + str(params['target_kl']) + \
                    '_tpi' + str(params['train_pi_iters']) + \
                    '_tvi' + str(params['train_v_iters']) + \
                    '_cr' + str(params['clip_ratio']) + \
                    '_seed' + str(params['seed'])

  model_logdir = os.path.join('/content/spinningup/data/obstacle', experiment_name)
  logger_kwargs = dict(output_dir=model_logdir, exp_name=experiment_name)

  env_fn = lambda : gym.make('CartPole-v2', seed=params['seed'])

  print(f'On evaluation {ii + 1} / {len(params_grid)}.')

  env, get_action = load_policy_and_env(model_logdir, 'CartPole-v2',
                                        False, 'last', True)
  run_policy(env, get_action, 0, 100, False)

In [None]:
!zip -r /content/spinningup/data/mountain /content/spinningup/data/mountain

## Visualization

In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
import os.path as osp
import numpy as np

DIV_LINE_WIDTH = 50

exp_idx = 0
units = dict()

def plot_data(data, xaxis='Epoch', value="AverageEpRet", condition="Condition2", smooth=1, **kwargs):
    if smooth > 1:
        y = np.ones(smooth)
        for datum in data:
            x = np.asarray(datum[value])
            z = np.ones(len(x))
            smoothed_x = np.convolve(x,y,'same') / np.convolve(z,y,'same')
            datum[value] = smoothed_x

    if isinstance(data, list):
        data = pd.concat(data, ignore_index=True)
    sns.set(style="darkgrid", font_scale=1.5)
    sns.tsplot(data=data, time=xaxis, value=value, unit="Unit", condition=condition, ci='sd', **kwargs)

    print(data.describe(include='all'))
    print(data.info())
    print(data['Condition1'])
    print(data['Condition2'])

    plt.legend(loc='best').set_draggable(True)

    xscale = np.max(np.asarray(data[xaxis])) > 5e3
    if xscale:
        plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))

    plt.tight_layout(pad=0.5)

def get_datasets(logdir, condition=None):
    global exp_idx
    global units
    datasets = []
    for root, _, files in os.walk(logdir):
        if 'progress.txt' in files:
            exp_name = None
            try:
                config_path = open(os.path.join(root,'config.json'))
                config = json.load(config_path)
                if 'exp_name' in config:
                    exp_name = config['exp_name']
            except:
                print('No file named config.json')
            condition1 = condition or exp_name or 'exp'
            condition2 = condition1 + '-' + str(exp_idx)
            exp_idx += 1
            if condition1 not in units:
                units[condition1] = 0
            unit = units[condition1]
            units[condition1] += 1

            try:
                exp_data = pd.read_table(os.path.join(root,'progress.txt'))
            except:
                print('Could not read from %s'%os.path.join(root,'progress.txt'))
                continue
            performance = 'AverageTestEpRet' if 'AverageTestEpRet' in exp_data else 'AverageEpRet'
            exp_data.insert(len(exp_data.columns),'Unit',unit)
            exp_data.insert(len(exp_data.columns),'Condition1',condition1)
            exp_data.insert(len(exp_data.columns),'Condition2',condition2)
            exp_data.insert(len(exp_data.columns),'Performance',exp_data[performance])
            datasets.append(exp_data)
    return datasets


def get_all_datasets(all_logdirs, legend=None, select=None, exclude=None):

    logdirs = []
    for logdir in all_logdirs:
        if osp.isdir(logdir) and logdir[-1]==os.sep:
            logdirs += [logdir]
        else:
            basedir = osp.dirname(logdir)
            fulldir = lambda x : osp.join(basedir, x)
            prefix = logdir.split(os.sep)[-1]
            listdir= os.listdir(basedir)
            logdirs += sorted([fulldir(x) for x in listdir if prefix in x])

    if select is not None:
        logdirs = [log for log in logdirs if all(x in log for x in select)]
    if exclude is not None:
        logdirs = [log for log in logdirs if all(not(x in log) for x in exclude)]

    # Verify logdirs
    print('Plotting from...\n' + '='*DIV_LINE_WIDTH + '\n')
    for logdir in logdirs:
        print(logdir)
    print('\n' + '='*DIV_LINE_WIDTH)

    # Make sure the legend is compatible with the logdirs
    assert not(legend) or (len(legend) == len(logdirs)), \
        "Must give a legend title for each set of experiments."

    # Load data from logdirs
    data = []
    if legend:
        for log, leg in zip(logdirs, legend):
            data += get_datasets(log, leg)
    else:
        for log in logdirs:
            data += get_datasets(log)
    return data


def make_plots(all_logdirs, legend=None, xaxis=None, values=None, count=False,  
               font_scale=1.5, smooth=1, select=None, exclude=None, estimator='mean'):
    data = get_all_datasets(all_logdirs, legend, select, exclude)
    values = values if isinstance(values, list) else [values]
    condition = 'Condition2' if count else 'Condition1'
    estimator = getattr(np, estimator)      # choose what to show on main curve: mean? max? min?
    for value in values:
        plt.figure()
        plot_data(data, xaxis=xaxis, value=value, condition=condition, smooth=smooth, estimator=estimator)
    plt.show()