In [1]:
# import os
# os.environ['PATH'] += ':ffmpeg-3.4.2-64bit-static'

# Setup

In [2]:
exp_directory = 'results'
agent_name = 'DeepQAgent'
game_name = 'SuperMarioBros-1-1'
render_mode='rgb_array'

### Global Modules

In [3]:
import os
import datetime
from multiprocessing import Lock
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
from gym.wrappers import Monitor


In [4]:
import logging
logger = logging.getLogger('gym')
logger.setLevel(50)

### Local Modules

In [5]:
import base
from src.agents import DeepQAgent, A3CAgent
from src.util import BaseCallback, JupyterCallback
from src.environment.nes import build_nes_environment

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Constants

In [6]:
agents = {
    DeepQAgent.__name__: DeepQAgent,
    A3CAgent.__name__: A3CAgent,
}

#### Output Directory

In [7]:
# setup the experiment directory
now = datetime.datetime.today().strftime('%Y-%m-%d_%H-%M')
exp_directory = '{}/{}/{}/{}'.format(exp_directory, game_name, agent_name, now)
if not os.path.exists(exp_directory):
    os.makedirs(exp_directory)
exp_directory

'results/SuperMarioBros-1-1/DeepQAgent/2018-04-20_21-47'

In [8]:
plot_dir = '{}/plots'.format(exp_directory)
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)
plot_dir

'results/SuperMarioBros-1-1/DeepQAgent/2018-04-20_21-47/plots'

# Environment

In [9]:
from multiprocessing import Lock

In [10]:
env = build_nes_environment(game_name)
env.configure(lock=Lock())
env = Monitor(env, '{}/monitor'.format(exp_directory), force=True)

In [11]:
env.observation_space

Box(100, 100, 4)

In [12]:
env.action_space

Discrete(14)

# Agent

In [13]:
agent = agents[agent_name](env, render_mode=render_mode)
agent

DeepQAgent(
    env=<Monitor<FrameStackEnv<ClipRewardEnv<PenalizeDeathEnv<DownsampleEnv<ToDiscreteWrapper<TimeLimit<SuperMarioBrosEnv instance>>>>>>>>,
    render_mode='rgb_array'
    replay_memory_size=1000000,
    discount_factor=0.99,
    update_frequency=4,
    optimizer=<keras.optimizers.Adam object at 0x7f426d3ecb70>,
    exploration_rate=AnnealingVariable(initial_value=1.0, final_value=0.1, steps=1000000),
    loss=huber_loss,
    target_update_freq=10000,
    dueling_network=True
)

In [None]:
# write some info about the agent to disk
with open('{}/agent.py'.format(exp_directory), 'w') as agent_file:
    agent_file.write(repr(agent))

## Initial

In [None]:
initial = agent.play(games=5)
initial = pd.Series(initial)
initial

 60%|██████    | 3/5 [04:14<02:49, 84.88s/game]

In [None]:
initial.to_csv('{}/initial.csv'.format(exp_directory))

In [None]:
initial.describe()

In [None]:
initial.hist()
plt.savefig('{}/initial.pdf'.format(plot_dir))

## Training

In [None]:
agent.observe()

In [None]:
callback = BaseCallback()

In [None]:
agent.train(callback=callback, frames_to_play=2500000)

In [None]:
# save the training results
scores = pd.Series(callback.scores)
scores.to_csv('{}/scores.csv'.format(exp_directory))
losses = pd.Series(callback.losses)
losses.to_csv('{}/losses.csv'.format(exp_directory))

In [None]:
train = pd.concat([scores, losses], axis=1)
train.columns = ['Reward', 'Loss']
train.index.name = 'Episode'
_ = train.plot(figsize=(12, 5), subplots=True)
plt.savefig('{}/training.pdf'.format(plot_dir))

## Final

In [None]:
final = agent.play(games=5)
final = pd.Series(final)
final

In [None]:
final.to_csv('{}/final.csv'.format(exp_directory))

In [None]:
final.describe()

In [None]:
final.hist()
plt.savefig('{}/final.pdf'.format(plot_dir))

## Saving Weights

In [None]:
agent.model.save_weights('{}/weights.h5'.format(exp_directory), overwrite=True)