# Functions and imports

In [2]:
import pandas as pd
import numpy as np
from random import randint
from matplotlib import pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

import os
from scipy.ndimage import convolve1d

In [3]:
# Name of the files
eval_dep = "performance_dep_color_hard_eval.csv"
pad_dep = "performance_dep_color_hard_pad.csv"
eval_sust = "performance_dep-longer_color_hard_eval.csv"
pad_sust = "performance_dep-longer_color_hard_pad.csv"
eval_pos = "performance_pos_steady_eval.csv"
pad_pos = "performance_pos_steady_pad.csv"
eval_pos_run = "performance_one-run-pos_steady_eval.csv"
pad_pos_run = "performance_one-run-pos_steady_pad.csv"
eval_avg_run = "performance_one-run-avg_color_hard_eval.csv"
pad_avg_run = "performance_one-run-avg_color_hard_pad.csv"
eval_pos_color = "performance_pos-color-03_color_hard_eval.csv"
pad_pos_color = "performance_pos-color-03_color_hard_pad.csv"
eval_avg_color = "performance_dep-color_color_hard_eval.csv"
pad_avg_color = "performance_dep-color_color_hard_pad.csv"

In [4]:
def moving_average_reward(rewards, current_ep=None, wind_lgth=3):
    """Compute non causal moving average with a filter of length wind_length + 2"""
    w = np.concatenate((np.zeros(wind_lgth -3), np.ones(wind_lgth+2))).astype(np.float64) / (wind_lgth+2)
    avg = convolve1d(rewards, w, mode='nearest')
    if current_ep is None:
        # Return full averaged array
        return avg
    else:
        # Return average at current time step
        assert current_ep >= 0
        return avg[current_ep]

In [5]:
def compute_speed(avg_reward, max_speed, coef = 1/10, max_reward=8):
    return (max_speed * np.exp(coef * (avg_reward  - max_reward))).astype(int)

In [6]:
def plot_single(reward, chg, change_type, threshold) :
    
    """Plot single run
    Args : - reward : pandas sequence of reward
           - chg : pandas sequence of change (to be defined) of same length as reward
           - change_type : str
           - threshold : at which we made a change"""
    
    avg_reward = moving_average_reward(reward.values)
    fig, axes = plt.subplots(3,1, figsize = (20,15))
    axes[0].plot(reward)
    axes[0].set_ylabel("Reward")
    axes[1].plot(avg_reward)
    axes[1].axhline(y = threshold, color = 'green')
    axes[1].set_ylabel("Average reward")
    axes[2].plot(chg)
    axes[2].set_ylabel(change_type)
    
    fig.suptitle("Episode analysis")
    fig.supxlabel("Timesteps")
    plt.show()

In [7]:
def make_statistics(df, name, change, threshold) :
    
    """Plot statistics across several episodes
    Args : - df : full data set with columns like ['episode_0_reward', 'episode_1_reward', ... 'episode_0_<change>', 'episode_1_<change>', ...]
           - name : str for description
           - change : either 'color_hard', 'video_i' (i in 0-9), 'color_easy', 'steady'
           - threshold : at which we introduced a change"""
    
    print("-"*60)
    print("For {}".format(name))
    
    df_change = df[[col for col in df.columns if col.endswith(change)]]
    df_reward = df[[col for col in df.columns if col.endswith('reward')]]
    avg_reward = df_reward.apply(moving_average_reward, axis = 0)
    
    mean = df_reward.sum(axis = 0).mean()
    std = df_reward.sum(axis = 0).std()
    print("Mean cumulative reward : {} +/- {}".format(mean, std))
    
    mean = df_change.iloc[-1,:].mean()
    std = df_change.iloc[-1,:].std()
    print("Mean {} : {} +/- {}".format(change, mean, std))
    
    means_avg = avg_reward.mean(axis = 1)
    stds_avg = avg_reward.std(axis = 1)
    diff_avg = 1.96 * stds_avg / np.sqrt(len(means_avg))
    means_ch = df_change.mean(axis = 1)
    stds_ch = df_change.std(axis = 1)
    diff_ch = 1.96 * stds_ch / np.sqrt(len(means_ch))
    
    fig, axes = plt.subplots(2,1, figsize = (20,15))
    
    # Plot avg smoothed reward
    axes[0].plot(means_avg)
    axes[0].fill_between(range(len(means_avg)), 
                        means_avg - diff_avg, 
                        means_avg + diff_avg,
                        color='gray', 
                        alpha=0.2)
    axes[0].axhline(y = threshold, color = 'green')        
    axes[0].set_ylabel("Average reward")
    
    # Plot avg change
    axes[1].plot(means_ch)
    axes[1].fill_between(range(len(means_ch)), 
                        means_ch - diff_ch, 
                        means_ch + diff_ch,
                        color='gray', 
                        alpha=0.2)        
    axes[1].set_ylabel("Average {}".format(change))
    
    fig.supxlabel("Time steps")
    fig.suptitle("Analysis for {}".format(name))
    plt.show()
    