In [None]:
"""This notebook plots evals stats saved by EvalCallback"""
import matplotlib
import matplotlib.pyplot as plt
import os
import numpy as np

In [None]:
# this notebook plots evals stats saved by EvalCallback
base_path = '../../logs/pedagogic_human_tf1_10M/'

filenames = os.listdir(base_path)
filenames = [f for f in filenames if 'dqn' in f]
filenames.sort()
filenames

In [None]:
def moving_average(values, window):
    """Smooth values by doing a moving average"""
    values = values.reshape(-1,)
    weights = np.repeat(1.0, window) / window
    return np.convolve(values, weights, 'same')


def plot_evals(filenames, min_x=-10, max_x=9_850_000, averaging_window_size=4, hline_y=None):
    plt.figure(num=None, figsize=(15, 6), dpi=80, facecolor='w', edgecolor='k')
    miny, maxy = 100, -100
    for f in filenames:
        eval_data = np.load(base_path + f + '/evaluations.npz')
        results = eval_data['results'].mean(axis=1)
        miny = min(min(results), miny)
        maxy = max(max(results), maxy)
        
        label = f'DQN seed {f[-1]}'
        plt.plot(eval_data['timesteps'], moving_average(results, averaging_window_size), label=label)
    
    if hline_y is not None:
        plt.hlines(hline_y, min_x, max_x, 
                   colors=['black'], linestyles=['dashed'], label='Optimal performance')
    plt.ylim(miny - 0.4, maxy + 0.4)
    plt.xlim(min_x, max_x)

    plt.yticks(np.arange(miny, maxy + 0.5, 0.5))
    plt.legend(loc=4)
    plt.grid(axis='y')
    plt.xlabel('Timesteps')
    plt.ylabel('Reward') 
    plt.yticks(ticks=np.arange(-1.5, 1.61, 0.5))
    

font = {'family' : 'Times New Roman',
        'weight' : 'normal',
        'size'   : 20}

matplotlib.rc('font', **font)
    
optimal_reward = 2 - (sum(np.arange(9)*0.01) + sum(np.arange(15)*0.01))/2
print(optimal_reward)
plot_evals(filenames, hline_y=optimal_reward)