# Breakout
## 1. Import

In [2]:
import os
import gymnasium as gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env

## 2. Test Env

In [9]:
env_name = "Breakout-v4"
render_mode = 'human'
env = gym.make(env_name,render_mode=render_mode)
env.metadata['render_fps']=120

In [5]:
env.action_space

Discrete(4)

In [6]:
env.observation_space

Box(0, 255, (210, 160, 3), uint8)

In [7]:
episodes =5
for episodes in range(1,episodes+1):
    obs = env.reset()
    terminated = False
    score = 0

    while not terminated:
        if render_mode:
            env.render()
        action = env.action_space.sample()
        obs,reward,terminated,truncated,info = env.step(action)
        score += reward
    print('Episode:{} Score:{}'.format(episodes,score))
env.close()

Episode:1 Score:1.0
Episode:2 Score:0.0
Episode:3 Score:1.0
Episode:4 Score:2.0
Episode:5 Score:0.0


## 3. Vectorise Env & Train Model

In [6]:
env = make_atari_env(env_name,n_envs=4,seed=0)
env = VecFrameStack(env,n_stack=4)

In [7]:
log_path = os.path.join('Training','Logs')
model = A2C('CnnPolicy',env,verbose=1,tensorboard_log=log_path)

Using cuda device
Wrapping the env in a VecTransposeImage.


In [11]:
model.learn(total_timesteps=100000)

Logging to Training/Logs/A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 265      |
|    ep_rew_mean        | 1.31     |
| time/                 |          |
|    fps                | 207      |
|    iterations         | 100      |
|    time_elapsed       | 9        |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.38    |
|    explained_variance | 0.032    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.101    |
|    value_loss         | 0.157    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 260      |
|    ep_rew_mean        | 1.17     |
| time/                 |          |
|    fps                | 223      |
|    iterations         | 200      |
|    time_elapsed       | 17       |
|    total_timesteps    | 4000     |
| train

<stable_baselines3.a2c.a2c.A2C at 0x7f38e02c32d0>

## 4. Save and Reload Model

In [15]:
a2c_path = os.path.join('Training','Saved_Models','A2C_Breakout_Model')
model.save(a2c_path)

In [13]:
del model

In [8]:
model = A2C.load(a2c_path,env)

Wrapping the env in a VecTransposeImage.


## 5. Evaluate and Test

In [11]:
env = make_atari_env(env_name,n_envs=1,seed=0)
env = VecFrameStack(env,n_stack=4)

In [14]:
evaluate_policy(model,env,n_eval_episodes=10,render=True if render_mode else False)

(6.8, 1.5362291495737217)