# 1. Import Dependencies

In [2]:
import gym
import os
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env

# 2. Test Environment

In [3]:
# http://www.atarimania.com/roms/Roms.rar
!python -m atari_py.import_roms r"D:\Roms\ROMS"

In [3]:
env_name = 'Breakout-v0'
env = gym.make(env_name)

In [7]:
env.reset()

array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       ...,

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=uint8)

In [8]:
env.action_space

Discrete(4)

In [9]:
env.observation_space

Box([[[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 ...

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]], [[[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 ...

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
 

In [11]:
episodes = 5
for episode in range(1, episodes + 1):
    obs = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score += reward
    print('Episode: {} Score: {}'.format(episode, score))
env.close()

Episode: 1 Score: 0.0
Episode: 2 Score: 4.0
Episode: 3 Score: 1.0
Episode: 4 Score: 2.0
Episode: 5 Score: 0.0


In [13]:
env.close()

# 3. Vectories Environment and Train Model

In [15]:
env = make_atari_env(env_name, n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

In [5]:
log_path = os.path.join('Training', 'logs')
model = A2C('CnnPolicy', env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [7]:
model.learn(total_timesteps=1000)

Logging to Training\logs\A2C_2
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 240      |
|    ep_rew_mean        | 1        |
| time/                 |          |
|    fps                | 16       |
|    iterations         | 100      |
|    time_elapsed       | 30       |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | -40.3    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6951     |
|    policy_loss        | 0.00707  |
|    value_loss         | 0.000328 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 228       |
|    ep_rew_mean        | 0.75      |
| time/                 |           |
|    fps                | 17        |
|    iterations         | 200       |
|    time_elapsed       | 55        |
|    total_timesteps    | 1000      

<stable_baselines3.a2c.a2c.A2C at 0x1f7ed1f41c0>

# 4 Save and Reload Model

In [None]:
a2c_path = os.path.join('Training', 'Saved Models', 'a2c_breakout_model')
model.save(a2c_path)

In [None]:
del model

In [None]:
model = A2C.load(a2c_path, env)

# Evaluate and Test

In [None]:
env = make_atari_env(env_name, n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

In [None]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)