In [1]:
from model import DDQN
from dqn import DeepQLearning
from atari_wrappers import PongWrapper

import torch
import numpy as np

import os, random

In [2]:
image_size = 42
stack_size = 4      # number of consecutive frames stacked


lr = 0.00025
gamma = 0.99

train_freq = 4
replay_buffer_size = 10000
initial_replay_buffer_size = 10000
target_update_freq = 1000

total_steps = int(5e5)
max_steps_per_ep = 50000
batch_size = 32*train_freq 

initial_epsilon = 1.0
final_epsilon = 0.01
num_decay_steps = 1e5

log_path = "tensorboard/ddqn/pong"
save_freq = 100000
save_path = "models/ddqn"
if not os.path.isdir(save_path):
    os.mkdir(save_path)


device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device('cuda')
    
env = PongWrapper({'env_name': "PongDeterministic-v4",'image_size':image_size, 'stack_size':stack_size})

seed = 0

env.env.seed(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
    
dqn = DDQN((stack_size, image_size, image_size), env.action_space.n, np.array([[32,4,2,0], [64,4,2,0], [64,3,1,0]]), [512]).to(device)

deepQ = DeepQLearning(env, 
                      dqn, 
                      total_steps=total_steps, replay_buffer_size=replay_buffer_size, 
                      initial_buffer_size=initial_replay_buffer_size, 
                      train_freq=train_freq, 
                      target_update_freq=target_update_freq, 
                      gamma=gamma, 
                      lr=lr, 
                      batch_size=batch_size,
                      max_steps_per_ep=max_steps_per_ep, 
                      initial_epsilon=initial_epsilon, 
                      final_epsilon=final_epsilon, 
                      num_decay_steps=num_decay_steps, 
                      save_freq=save_freq, 
                      save_path=save_path, 
                      log_path=log_path, 
                      device=device)

In [3]:
deepQ.train()

Frame ID: 10000, episode: 163, average return last 100 episodes: -0.98, fps: 298.8844643084881
Frame ID: 20000, episode: 325, average return last 100 episodes: -0.98, fps: 290.26945672814605
Frame ID: 30000, episode: 467, average return last 100 episodes: -0.94, fps: 281.9138671452645
Frame ID: 40000, episode: 601, average return last 100 episodes: -0.95, fps: 274.08181234643257
Frame ID: 50000, episode: 729, average return last 100 episodes: -0.96, fps: 267.8911851225067
Frame ID: 60000, episode: 841, average return last 100 episodes: -0.92, fps: 261.7069510274526
Frame ID: 70000, episode: 951, average return last 100 episodes: -0.91, fps: 256.25869351754733
Frame ID: 80000, episode: 1033, average return last 100 episodes: -0.79, fps: 251.26356260813026
Frame ID: 90000, episode: 1113, average return last 100 episodes: -0.82, fps: 247.00507248672977
Frame ID: 100000, episode: 1173, average return last 100 episodes: -0.71, fps: 242.39373064848985
Saved model at step: 100000
Frame ID: 11