# 1. Import Dependencies

In [1]:
import gym 
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

# 2. Test Environment

In [2]:
!python -m atari_py.import_roms .\ROMS\ROMS

copying adventure.bin from .\ROMS\ROMS\Adventure (1980) (Atari, Warren Robinett) (CX2613, CX2613P) (PAL).bin to C:\Users\tolga\Anaconda3\lib\site-packages\atari_py\atari_roms\adventure.bin
copying air_raid.bin from .\ROMS\ROMS\Air Raid (Men-A-Vision) (PAL) ~.bin to C:\Users\tolga\Anaconda3\lib\site-packages\atari_py\atari_roms\air_raid.bin
copying alien.bin from .\ROMS\ROMS\Alien (1982) (20th Century Fox Video Games, Douglas 'Dallas North' Neubauer) (11006) ~.bin to C:\Users\tolga\Anaconda3\lib\site-packages\atari_py\atari_roms\alien.bin
copying amidar.bin from .\ROMS\ROMS\Amidar (1982) (Parker Brothers, Ed Temple) (PB5310) ~.bin to C:\Users\tolga\Anaconda3\lib\site-packages\atari_py\atari_roms\amidar.bin
copying assault.bin from .\ROMS\ROMS\Assault (AKA Sky Alien) (1983) (Bomb - Onbase) (CA281).bin to C:\Users\tolga\Anaconda3\lib\site-packages\atari_py\atari_roms\assault.bin
copying asterix.bin from .\ROMS\ROMS\Asterix (AKA Taz) (07-27-1983) (Atari, Jerome Domurat, Steve Woita) (CX269

Traceback (most recent call last):
  File "C:\Users\tolga\Anaconda3\lib\runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\tolga\Anaconda3\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "C:\Users\tolga\Anaconda3\lib\site-packages\atari_py\import_roms.py", line 93, in <module>
    main()
  File "C:\Users\tolga\Anaconda3\lib\site-packages\atari_py\import_roms.py", line 89, in main
    import_roms(args.dirpath)
  File "C:\Users\tolga\Anaconda3\lib\site-packages\atari_py\import_roms.py", line 74, in import_roms
    with open(filepath, "rb") as f:
FileNotFoundError: [Errno 2] No such file or directory: '.\\ROMS\\ROMS\\Name This Game (Guardians of Treasure) (1983) (U.S. Games Corporation - JWDA, Roger Booth, Sylvia Day, Ron Dubren, Todd Marshall, Robin McDaniel, Wes Trager, Henry Will IV) (VC1007) ~.bin'


In [3]:
enviroment_name = 'Breakout-v0'
env = gym.make(enviroment_name)

In [4]:
env.reset().shape

(210, 160, 3)

In [5]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

Episode:1 Score:2.0
Episode:2 Score:2.0
Episode:3 Score:1.0
Episode:4 Score:0.0
Episode:5 Score:0.0


In [6]:
env.close()

# 3. Vectorise Environment and Train Model

In [7]:
env = make_atari_env('Breakout-v0', n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

In [8]:
env.close()

In [9]:
log_path = os.path.join('Training', 'Logs')

In [10]:
model = A2C('CnnPolicy', env, verbose = 1, tensorboard_log = log_path)

Using cuda device
Wrapping the env in a VecTransposeImage.


In [11]:
model.learn(total_timesteps=5000)

Logging to Training\Logs\A2C_6
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 286      |
|    ep_rew_mean        | 1.69     |
| time/                 |          |
|    fps                | 117      |
|    iterations         | 100      |
|    time_elapsed       | 16       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | 0.192    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.249    |
|    value_loss         | 0.212    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 277      |
|    ep_rew_mean        | 1.51     |
| time/                 |          |
|    fps                | 116      |
|    iterations         | 200      |
|    time_elapsed       | 34       |
|    total_timesteps    | 4000     |
| train

<stable_baselines3.a2c.a2c.A2C at 0x14632dc2f40>

# 4. Save and Reload Model

In [12]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C_Breakout_Model')
model.save(a2c_path)

In [31]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C_Breakout_Model')
env = make_atari_env('Breakout-v0', n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)
model = A2C.load(a2c_path, env)

Wrapping the env in a VecTransposeImage.


# 5. Evaluate and Test

In [23]:
env = make_atari_env('Breakout-v0', n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

In [28]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

(2.3, 0.45825756949558405)

In [25]:
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()

KeyboardInterrupt: 

In [29]:
env.close()