# 1. Import Dependencies

In [39]:
import gymnasium as gym
import os
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
from ale_py import ALEInterface
from ale_py.roms import Breakout

# 2. Test Environment

In [40]:
# ale = ALEInterface()
# ale.loadROM(Breakout)

In [41]:
ennvironment_name = 'ALE/Breakout-v5'
env = gym.make(ennvironment_name)

In [42]:
env.reset()

(array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        ...,
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]], dtype=uint8),
 {'lives': 5, 'episode_frame_number': 0, 'frame_number': 0})

In [43]:
env.action_space

Discrete(4)

In [44]:
env.observation_space

Box(0, 255, (210, 160, 3), uint8)

In [45]:
epsiodes = 10
for episode in range(1, epsiodes+1):
    obs = env.reset()
    done = False
    score = 0

    while not done:
        action = env.action_space.sample()
        nobs, reward, done, truncated, info = env.step(action)
        score += reward
    print(f'Episode: {episode} Score: {score}')

Episode: 1 Score: 2.0
Episode: 2 Score: 0.0
Episode: 3 Score: 0.0
Episode: 4 Score: 0.0
Episode: 5 Score: 1.0
Episode: 6 Score: 2.0
Episode: 7 Score: 0.0
Episode: 8 Score: 2.0
Episode: 9 Score: 4.0
Episode: 10 Score: 4.0


In [46]:
env.close()

# 3. Vectorize Environment and Train Model

In [47]:
env = make_atari_env('ALE/Breakout-v5', n_envs=4, seed=73)
env = VecFrameStack(env, n_stack=4)

In [48]:
log_path = os.path.join('Training', 'Logs', 'Breakout')
model = A2C('CnnPolicy', env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [49]:
model.learn(total_timesteps=100000)

Logging to Training/Logs/Breakout/A2C_2
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 244         |
|    ep_rew_mean        | 2.05        |
| time/                 |             |
|    fps                | 245         |
|    iterations         | 100         |
|    time_elapsed       | 8           |
|    total_timesteps    | 2000        |
| train/                |             |
|    entropy_loss       | -1.38       |
|    explained_variance | 0.053818464 |
|    learning_rate      | 0.0007      |
|    n_updates          | 99          |
|    policy_loss        | -0.295      |
|    value_loss         | 0.148       |
---------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 239       |
|    ep_rew_mean        | 1.9       |
| time/                 |           |
|    fps                | 247       |
|    iterations         | 200       |
|    time_elap

<stable_baselines3.a2c.a2c.A2C at 0x17f643fa0>

# 4. Save and Reload Model

In [50]:
a2c_path = os.path.join('Training', 'Saved_Models', 'A2C_Breakout')
model.save(a2c_path)



In [51]:
del model

In [52]:
model = A2C.load(a2c_path, env)

Wrapping the env in a VecTransposeImage.


# 5. Evaluate and Test

In [53]:
env = make_atari_env('ALE/Breakout-v5', n_envs=1, seed=73)
env = VecFrameStack(env, n_stack=4)

In [56]:
evaluate_policy(model, env, n_eval_episodes=50, render=False)

In [38]:
model.predict(env.reset())

(array([2]), None)

In [55]:
obs = env.reset()
epsiodes = 10
for episode in range(1, epsiodes+1):
    obs = env.reset()
    done = False
    score = 0

    while not done:
        action, _states = model.predict(obs)
        obs, reward, done, info = env.step(action)
        score += reward
    print(f'Episode: {episode} Score: {score}')

Episode: 1 Score: [4.]
Episode: 2 Score: [0.]
Episode: 3 Score: [0.]
Episode: 4 Score: [0.]
Episode: 5 Score: [0.]
Episode: 6 Score: [0.]
Episode: 7 Score: [0.]
Episode: 8 Score: [2.]
Episode: 9 Score: [0.]
Episode: 10 Score: [0.]


In [1]:
env.close()

NameError: name 'env' is not defined