In [1]:
import os
import gym
from keras import backend as K
from keras import optimizers
from _base import build_env, experiment_dir
from _top_level import top_level
top_level()

Using TensorFlow backend.


In [2]:
from src.agents import DeepQAgent
from src.base import AnnealingVariable
from src.wrappers import wrap
from src.models.losses import huber_loss
from src.callbacks import BaseCallback
from src.callbacks import JupyterCallback
from src.utils import seed

In [3]:
# set the random number seed
seed(1)

In [4]:
def make_environment():
    env_id = 'PongNoFrameskip-v4'
    env = wrap(gym.make(env_id), 
        cache_rewards=True,
        noop_max=30,
        frame_skip=4,
        max_frame_skip=False,
        image_size=(84, 84),
        death_penalty=-1,
        clip_rewards=True,
        agent_history_length=4
    )
    env.unwrapped.seed(1)
    output_dir = experiment_dir('../results', env_id, DeepQAgent.__name__)
    return env, output_dir

In [5]:
# make the environment and output directory 
env, output_dir = make_environment()

In [6]:
env

<FrameStackEnv<ClipRewardEnv<PenalizeDeathEnv<DownsampleEnv<FrameSkipEnv<FireResetEnv<NoopResetEnv<RewardCacheEnv<TimeLimit<AtariEnv<PongNoFrameskip-v4>>>>>>>>>>>

In [7]:
output_dir

'../results/PongNoFrameskip-v4/DeepQAgent/2019-01-20_04-02'

In [8]:
# create a file to save the weights to
weights_file = os.path.join(output_dir, 'weights.h5')

In [None]:
# create an agent
agent = DeepQAgent(env,
    render_mode='human',
    replay_memory_size=750000,
    prioritized_experience_replay=False,
    discount_factor=0.99,
    update_frequency=4,
    optimizer=optimizers.Adam(lr=2e-5),
    exploration_rate=AnnealingVariable(initial_value=1.0, final_value=0.01, steps=1e6),
    loss=huber_loss,
    target_update_freq=10000,
    dueling_network=False,
)
agent

In [None]:
# observe random movement in the environment to pre-fill the experience 
# replay queue with memories
agent.observe()

In [None]:
# create a callback for the training procedure to log metrics
callback = BaseCallback(weights_file)
plotter = JupyterCallback()
# train the agent with given parameters and the callback
agent.train(5e6, callback=[callback, plotter])
# save the weights to disk after the training procedure
agent.model.save_weights(weights_file, overwrite=True)

In [None]:
callback.export(os.path.join(output_dir, 'training'))

In [None]:
# clear the keras session to remove the training model from memory
K.clear_session()
# create a validation agent with a monitor attatched
agent = DeepQAgent(wrap(gym.make(ENV_ID), death_penalty=-1))
# load the trained weights into the validation agent
agent.model.load_weights(weights_file)

In [None]:
# run the agent through validation episodes
df = agent.play()

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
# plot the validation results and save the tables and figures to disk
agent.plot_episode_rewards(os.path.join(output_dir, 'play'))