In [1]:
!mkdir -p results results/double_dqn

In [2]:
%matplotlib inline
import pandas as pd

In [3]:
import gym
from gym.wrappers import Monitor

In [4]:
import base
from src.agents import DeepQAgent
from src.downsamplers import downsample_breakout

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Environment

In [5]:
# Deterministic - use a static number of frames for frame skipping
# v4 - disable 25% chance of ignoring action and repeating last action
env = gym.make('BreakoutDeterministic-v4')
env = Monitor(env, './monitor/double_dqn/breakout', force=True)

In [6]:
env.observation_space

Box(210, 160, 3)

In [7]:
env.action_space

Discrete(4)

# Agent

In [8]:
agent = DeepQAgent(env, downsample_breakout, 
    replay_memory_size=0,
    render_mode='rgb_array'
)
agent.model.load_weights('./models/double_dqn/breakout.h5')
agent

DeepQAgent(
    env=<Monitor<TimeLimit<AtariEnv<BreakoutDeterministic-v4>>>>,
    downsample=Downsampler(y=(32, 14), x=(8, 8), cut=[142]),
    replay_memory_size=0,
    agent_history_length=4,
    discount_factor=0.99,
    update_frequency=4,
    optimizer=<keras.optimizers.RMSprop object at 0x10a514f98>,
    exploration_rate=AnnealingVariable(initial_value=1.0, final_value=0.1, steps=1000000),
    null_op_max=30,
    null_op=0,
    loss=<function huber_loss at 0x118487158>,
    image_size=(84, 84),
    render_mode='rgb_array'
)

In [9]:
fps = None # 60
scores = agent.play(games=100, fps=fps)
scores = pd.Series(scores)
scores

100%|██████████| 100/100 [06:14<00:00,  3.74s/game]


0     24.0
1     25.0
2     20.0
3     52.0
4     39.0
5     51.0
6     10.0
7     55.0
8     20.0
9     19.0
10    13.0
11    17.0
12    30.0
13    23.0
14     4.0
15    18.0
16    20.0
17    20.0
18    24.0
19    14.0
20    27.0
21    47.0
22    14.0
23    11.0
24    46.0
25    23.0
26    15.0
27     5.0
28    19.0
29     6.0
      ... 
70    16.0
71    18.0
72    31.0
73    28.0
74    18.0
75    21.0
76    29.0
77    59.0
78     7.0
79    17.0
80    31.0
81    18.0
82    70.0
83    32.0
84    34.0
85    23.0
86    17.0
87    33.0
88    14.0
89    23.0
90    28.0
91    25.0
92    22.0
93    28.0
94    29.0
95     3.0
96    16.0
97    16.0
98    39.0
99    24.0
Length: 100, dtype: float64

In [10]:
scores.mean()

25.71