## 1. Applying GPU-Acceleration

In [1]:
!pip3 install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio===0.9.0 -f https://download.pytorch.org/whl/torch_stable.html

Looking in links: https://download.pytorch.org/whl/torch_stable.html


## 2. Import Dependencies

In [2]:
# Imports
import gym
from stable_baselines3 import A2C #Algorithm
from stable_baselines3.common.vec_env import VecFrameStack #Training mulitple Environments at the same time
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env #Supporting tool for atari environments
import os # os = operating system

## 3. Testing Environment 

In [3]:
!python -m atari_py.import_roms C:\Roms\ROMS

copying adventure.bin from C:\Roms\ROMS\Adventure (1980) (Atari, Warren Robinett) (CX2613, CX2613P) (PAL).bin to C:\Users\Ilyass\AppData\Local\Programs\Python\Python39\lib\site-packages\atari_py\atari_roms\adventure.bin
copying air_raid.bin from C:\Roms\ROMS\Air Raid (Men-A-Vision) (PAL) ~.bin to C:\Users\Ilyass\AppData\Local\Programs\Python\Python39\lib\site-packages\atari_py\atari_roms\air_raid.bin
copying alien.bin from C:\Roms\ROMS\Alien (1982) (20th Century Fox Video Games, Douglas 'Dallas North' Neubauer) (11006) ~.bin to C:\Users\Ilyass\AppData\Local\Programs\Python\Python39\lib\site-packages\atari_py\atari_roms\alien.bin
copying amidar.bin from C:\Roms\ROMS\Amidar (1982) (Parker Brothers, Ed Temple) (PB5310) ~.bin to C:\Users\Ilyass\AppData\Local\Programs\Python\Python39\lib\site-packages\atari_py\atari_roms\amidar.bin
copying assault.bin from C:\Roms\ROMS\Assault (AKA Sky Alien) (1983) (Bomb - Onbase) (CA281).bin to C:\Users\Ilyass\AppData\Local\Programs\Python\Python39\lib\si

In [4]:
environment_name = 'Breakout-v0'
env = gym.make(environment_name)
episodes = 5
for episode in range(1,episodes+1):
    obs = env.reset()
    done = False
    score=0
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action) 
        score += reward
    print('Episode:{}, Score:{}', format(episode, score))
#env.close()

Exception: ROM is missing for breakout, see https://github.com/openai/atari-py#roms for instructions

## 4. Vectorize Environment and Train Model

In [None]:
env = make_atari_env('Breakout-v0', n_envs = 4, seed = 0 )
env = VecFrameStack(env, n_stack = 4)

In [None]:
log_path = os.path.join('Training','Logs')
model = A2C('CnnPolicy', env, verbose = 1, tensorboard_log = log_path)

In [None]:
model.learn(total_timesteps = 100000)

## 5. Save and Reload Model

In [None]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C Breakout Model')
model.save(a2c_path)

In [None]:
# del model

In [None]:
# a2c_path = os.join.path('Training', 'Saved Models', 'Other A2C Model')

In [None]:
# model = A2C.load(a2c_path, env)

## 6. Evaluate and Test

In [None]:
# evalaute policy works only for one environment if n_envs = 1
env = make_atari_env('Breakout-v0', n_envs = 1, seed = 0 )
env = VecFrameStack(env, n_stack = 4)
evaluate = evaluate_policy(model, env, n_eval_episodes=5, render = True )

In [None]:
env.close()

In [None]:
episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        action, _ = model.predict(obs) # Now using model here
        reward, info, obs, done = env.step(action)
    print(f'Episode:{episode}, Score:{score}')
env.close()