In [None]:
import os
import gym
from keras import backend as K
from keras import optimizers
from _base import build_env, experiment_dir
from _top_level import top_level
top_level()

In [None]:
from src.agents import DeepQAgent
from src.base import AnnealingVariable
from src.models.losses import huber_loss
from src.utils import BaseCallback
from src.utils import JupyterCallback
from src.utils import seed

In [None]:
ENV_ID = 'SuperMarioBrosRandomStages-v0'

In [None]:
# set the random number seed
seed(1)
# make the output directory 
OUTPUT_DIR = experiment_dir('../results', ENV_ID, DeepQAgent.__name__)
OUTPUT_DIR

In [None]:
# create a file to save the weights to
WEIGHTS_FILE = os.path.join(OUTPUT_DIR, 'weights.h5')

In [None]:
# create the environment
env = build_env(ENV_ID)
env.unwrapped.seed(1)
env

In [None]:
# create an agent
agent = DeepQAgent(env,
    render_mode='human',
    replay_memory_size=750000,
    prioritized_experience_replay=False,
    discount_factor=0.99,
    update_frequency=4,
    optimizer='adam',
    exploration_rate=AnnealingVariable(initial_value=1.0, final_value=0.01, steps=40e6),
    loss=huber_loss,
    target_update_freq=10000,
    dueling_network=False,
)
agent

In [None]:
# observe random movement in the environment to pre-fill the experience 
# replay queue with memories
agent.observe()

In [None]:
# create a callback for the training procedure to log metrics
callback = BaseCallback(WEIGHTS_FILE)
plotter = JupyterCallback()
# train the agent with given parameters and the callback
agent.train(50e6, callback=[callback, plotter])
# save the weights to disk after the training procedure
agent.model.save_weights(WEIGHTS_FILE, overwrite=True)

In [None]:
callback.export(os.path.join(OUTPUT_DIR, 'training'))

In [None]:
# clear the keras session to remove the training model from memory
K.clear_session()
# create a validation agent with a monitor attatched
agent = DeepQAgent(build_env('SuperMarioBros-v0', monitor_dir=os.path.join(OUTPUT_DIR, 'play')))
# load the trained weights into the validation agent
agent.model.load_weights(WEIGHTS_FILE)

In [None]:
# run the agent through validation episodes
df = agent.play()

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
# plot the validation results and save the tables and figures to disk
agent.plot_episode_rewards(os.path.join(OUTPUT_DIR, 'play'))