## Import library and data

In [None]:
from textwrap import wrap

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

Import rewards per episode for DQN and double DQN for all 3 seeds.

In [None]:
num_seeds = 3
df_dqn = pd.concat(
    [
        pd.read_csv(f"dqn_reward_per_episode_seed{seed}.csv", names=[f"seed_{seed}"])
        for seed in range(1, num_seeds + 1)
    ], axis=1
)
df_double_dqn = pd.concat(
    [
        pd.read_csv(
            f"double_dqn_reward_per_episode_seed{seed}.csv", names=[f"seed_{seed}"]
        )
        for seed in range(1, num_seeds + 1)
    ], axis=1
)



## Plot reward curves

For each agent, plot the mean and standard deviation between seeds. Average over a
window of 10 episodes to smooth curves.

In [None]:
fig, ax = plt.subplots()
clrs = sns.color_palette("husl", 2)
with sns.axes_style("darkgrid"):
    plt.axhline(y=13, linestyle='--', color='black', linewidth=1)
    for idx, (name, df) in enumerate([("DQN", df_dqn), ("Double DQN", df_double_dqn)]):
        episodes = np.arange(1, len(df)+1)  # x axis
        df_smoothed = df.rolling(10, min_periods=1).mean()
        mean = df_smoothed.mean(axis="columns").to_numpy(np.float64)
        sdt = df_smoothed.std(axis="columns").to_numpy(np.float64)
        ax.plot(episodes, mean, label=name, c=clrs[idx])
        ax.fill_between(episodes, mean-sdt, mean+sdt ,alpha=0.3, facecolor=clrs[idx])
    ax.legend()
    ax.set_title(
        "\n".join(
            wrap(
                "Mean and standard deviation of the score per episode during the "
                "training of DQN and Double DQN agents across 3 seeds.", 60
            )
        )
    )
    plt.ylabel("Score")
    plt.xlabel("Episode")

plt.savefig("reward_per_episode.png")