### Train DDQN with Prioritised experience replay on Breakout

In [None]:
import os
import sys

import numpy as np
import torch

In [None]:
# This notebook is intended for colab. Fail fast if not using it
from google.colab import files

Install gym onto Colab machine

In [None]:
!pip install gym
!pip install gym['atari']

In [None]:
cwd = os.path.split(os.getcwd())[0]
if "notebooks" in cwd:
    cwd = os.sep.join(cwd.split(os.sep)[:-1])
if cwd not in sys.path:
    sys.path.append(cwd)

In [None]:
from src.algorithms.double_deep_q_learning import DoubleDQNAtariAgent
from src.models import DDQN
from src.utils.env import make_atari, wrap_deepmind
from src.utils.logger import Logger
from src.utils.replay_memory import PrioritisedMemory

In [None]:
def _moving_average(interval, window_size):
    window = np.ones(int(window_size)) / float(window_size)
    return list(np.convolve(interval, window, "same"))

Define training parameters

In [None]:
CAPACITY = 500_000
SKIP_N = 2

frames = 1_000_000
TARGET_UPDATE_FREQUENCY = 10_000

EPSILON_METHOD = "linear"
EPSILON_FRAMES = int(0.2 * frames)
EPSILON_ARGS = [EPSILON_METHOD, EPSILON_FRAMES]
EPSILON_KWARGS = {"epsilon_min": 0.1}

width = height = 64

Define the env

In [None]:
env_name = "BreakoutNoFrameskip-v4"
env = make_atari(env_name)
env = wrap_deepmind(env, width=width, height=height, skip_n=SKIP_N)
n_actions = env.action_space.n

Create the models

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DDQN(SKIP_N, width, n_actions).to(device)
target_model = DDQN(SKIP_N, width, n_actions).to(device)
target_model.load_state_dict(model.state_dict())
target_model.eval()

memory = PrioritisedMemory(CAPACITY, EPSILON_FRAMES)

Create a logger

In [None]:
!mkdir models

In [None]:
save_path = os.path.join("/content/models")  # For colab!

# Each life counts as an episode, so there will be a lot
logger = Logger(
    save_path,
    save_best=True,
    save_every=5000,
    log_every=500,
    C=TARGET_UPDATE_FREQUENCY,
    capacity=CAPACITY,
)

### Train

In [None]:
agent = DoubleDQNAtariAgent(
    model, target_model, env, memory, logger, *EPSILON_ARGS, **EPSILON_KWARGS
)
agent.train(n_frames=frames, C=TARGET_UPDATE_FREQUENCY, render=False)

In [None]:
files.download("/content/models/best_model.pth")
files.download("/content/models/q_values.png")
files.download("/content/models/rewards.png")