# Setup

In [None]:
exp_directory = 'results'
agent_name = 'DeepQAgent'
game_name = 'Breakout'
render_mode='rgb_array'

### Global Modules

In [None]:
import os
import datetime
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
from gym.wrappers import Monitor

### Local Modules

In [None]:
import base
from src.agents import DeepQAgent, A3CAgent
from src.util import BaseCallback, JupyterCallback
from src.environment.atari import build_atari_environment

## Constants

In [None]:
agents = {
    DeepQAgent.__name__: DeepQAgent,
    A3CAgent.__name__: A3CAgent,
}

#### Output Directory

In [None]:
# setup the experiment directory
now = datetime.datetime.today().strftime('%Y-%m-%d_%H-%M')
exp_directory = '{}/{}/{}/{}'.format(exp_directory, game_name, agent_name, now)
if not os.path.exists(exp_directory):
    os.makedirs(exp_directory)
exp_directory

In [None]:
plot_dir = '{}/plots'.format(exp_directory)
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)
plot_dir

# Environment

In [None]:
env = build_atari_environment(game_name)
# env = Monitor(env, '{}/monitor'.format(exp_directory), force=True)

In [None]:
env.observation_space

In [None]:
env.action_space

# Agent

In [None]:
agent = agents[agent_name](env, render_mode=render_mode)
agent

In [None]:
# write some info about the agent to disk
with open('{}/agent.py'.format(exp_directory), 'w') as agent_file:
    agent_file.write(repr(agent))

## Initial

In [None]:
initial = agent.play()
initial = pd.Series(initial)
initial

In [None]:
initial.to_csv('{}/initial.csv'.format(exp_directory))

In [None]:
initial.describe()

In [None]:
initial.hist()
plt.savefig('{}/initial.pdf'.format(plot_dir))

## Training

In [None]:
agent.observe()

In [None]:
callback = BaseCallback()
agent.train(callback=callback)

In [None]:
# save the training results
scores = pd.Series(callback.scores)
scores.to_csv('{}/scores.csv'.format(exp_directory))
losses = pd.Series(callback.losses)
losses.to_csv('{}/losses.csv'.format(exp_directory))

In [None]:
train = pd.concat([scores, losses], axis=1)
train.columns = ['Reward', 'Loss']
train.index.name = 'Episode'
_ = train.plot(figsize=(12, 5), subplots=True)
plt.savefig('{}/training.pdf'.format(plot_dir))

## Final

In [None]:
final = agent.play()
final = pd.Series(final)
final

In [None]:
final.to_csv('{}/final.csv'.format(exp_directory))

In [None]:
final.describe()

In [None]:
final.hist()
plt.savefig('{}/final.pdf'.format(plot_dir))

## Saving Weights

In [None]:
agent.model.save_weights('{}/weights.h5'.format(exp_directory), overwrite=True)