# Atari Game

### 0. Import libraries

It is asumed that env from environment.waml is created.

If not, use: conda env create --file=environment.yaml

In [1]:
import gym 
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy


In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 13613357404959763401
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 5717884928
locality {
  bus_id: 1
  links {
  }
}
incarnation: 6573289936603156003
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9"
xla_global_id: 416903419
]


In [3]:
import tensorflow
print("Num GPUs Available: ", len(tensorflow.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


### 1. Test random enviroment

In [4]:
env = gym.make('SpaceInvaders-v4')
#env = gym.make('SpaceInvaders-v4', render_mode="human")
height, width, channels = env.observation_space.shape
actions = env.action_space.n
height, width, channels, actions

(210, 160, 3, 6)

In [5]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [6]:
episodes = 15
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        action = random.choice([0,1,2,3,4,5])
        obs, reward, terminated, truncated , info = env.step(action)
        done = truncated or terminated
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

  if not isinstance(terminated, (bool, np.bool8)):


Episode:1 Score:60.0
Episode:2 Score:160.0
Episode:3 Score:140.0
Episode:4 Score:95.0
Episode:5 Score:310.0
Episode:6 Score:100.0
Episode:7 Score:45.0
Episode:8 Score:185.0
Episode:9 Score:110.0
Episode:10 Score:85.0
Episode:11 Score:50.0
Episode:12 Score:15.0
Episode:13 Score:365.0
Episode:14 Score:265.0
Episode:15 Score:120.0


### 2. Create a DL model

In [7]:
def build_model(height, width, channels, actions):
    with tensorflow.device('/gpu:0'):
        model = Sequential()
        model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height, width, channels)))
        model.add(Convolution2D(50, (4,4), strides=(2,2), activation='relu'))
        model.add(Convolution2D(50, (3,3), activation='relu'))
        model.add(Flatten())
        model.add(Dense(400, activation='relu'))
        model.add(Dense(300, activation='relu'))
        model.add(Dense(200, activation='relu'))
        model.add(Dense(actions, activation='linear'))
    return model

In [8]:
model = build_model(height, width, channels, actions)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 3, 51, 39, 32)     6176      
                                                                 
 conv2d_1 (Conv2D)           (None, 3, 24, 18, 50)     25650     
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 22, 16, 50)     22550     
                                                                 
 flatten (Flatten)           (None, 52800)             0         
                                                                 
 dense (Dense)               (None, 400)               21120400  
                                                                 
 dense_1 (Dense)             (None, 300)               120300    
                                                                 
 dense_2 (Dense)             (None, 200)               6

### 3. Build the DQN Agent

In [9]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', 
                                  value_max=1., value_min=.1, 
                                  value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg', 
                   nb_actions=actions, nb_steps_warmup=1000
                  )
    return dqn



In [10]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-4))

In [12]:
with tensorflow.device('/gpu:0'):
    model = build_model(height, width, channels, actions)
    dqn = build_agent(model, actions)
    dqn.compile(Adam(lr=1e-4))
    dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)

Training for 10000 steps ...
  422/10000: episode: 1, duration: 7.987s, episode steps: 422, steps per second:  53, episode reward: 105.000, mean reward:  0.249 [ 0.000, 30.000], mean action: 2.637 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --


  updates=self.state_updates,


 1140/10000: episode: 2, duration: 138.354s, episode steps: 718, steps per second:   5, episode reward: 105.000, mean reward:  0.146 [ 0.000, 30.000], mean action: 2.362 [0.000, 5.000],  loss: 3.670008, mean_q: 2.365392, mean_eps: 0.903700
 1754/10000: episode: 3, duration: 548.849s, episode steps: 614, steps per second:   1, episode reward: 135.000, mean reward:  0.220 [ 0.000, 30.000], mean action: 2.445 [0.000, 5.000],  loss: 0.830860, mean_q: 1.892055, mean_eps: 0.869815
 2538/10000: episode: 4, duration: 698.695s, episode steps: 784, steps per second:   1, episode reward: 155.000, mean reward:  0.198 [ 0.000, 30.000], mean action: 2.546 [0.000, 5.000],  loss: 0.528761, mean_q: 2.291377, mean_eps: 0.806905
 3152/10000: episode: 5, duration: 546.985s, episode steps: 614, steps per second:   1, episode reward: 80.000, mean reward:  0.130 [ 0.000, 20.000], mean action: 2.536 [0.000, 5.000],  loss: 0.212084, mean_q: 1.889015, mean_eps: 0.743995
 3757/10000: episode: 6, duration: 539.71

In [14]:
dqn.save_weights('atari_weights.h5f',overwrite=True)

In [15]:
env1 = gym.make('SpaceInvaders-v4', render_mode = 'human')
height, width, channels = env1.observation_space.shape
actions = env1.action_space.n
model1 = build_model(height, width, channels, actions)
dqn1 = build_agent(model1, actions)
dqn1.compile(Adam(learning_rate=1e-4))
dqn1.load_weights('atari_weights.h5f')

scores = dqn1.test(env1, nb_episodes=10, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...


  updates=self.state_updates,
  if not isinstance(terminated, (bool, np.bool8)):
  logger.warn(


Episode 1: reward: 85.000, steps: 628
Episode 2: reward: 175.000, steps: 965
Episode 3: reward: 30.000, steps: 530
Episode 4: reward: 40.000, steps: 643
Episode 5: reward: 15.000, steps: 501
Episode 6: reward: 15.000, steps: 658
Episode 7: reward: 20.000, steps: 611
Episode 8: reward: 115.000, steps: 917
Episode 9: reward: 80.000, steps: 734
Episode 10: reward: 155.000, steps: 901
73.0
