In [None]:
!pip install tensorflow gym keras-rl2 gym[atari]
%pip install -U gym
%pip install -U gym[atari,accept-rom-license]
%pip install -U gym[ale,accept-rom-license]
%pip install gym[atari]
%pip install autorom[accept-rom-license]

To be able to render the openAI environment and take random steps inside the environment to see how it perform the following need to be imported.

In [1]:
import gym       #to be able to render the openAI environment
import random    #to take some random steps inside the environment to see how it performs

To generate the environment and see the spaceinvaders game.

In [2]:
env = gym.make('SpaceInvaders-v4', render_mode='human')   #Allows to generate environment
height, width, channels = env.observation_space.shape #taking, height width and channels to shape the image 
actions = env.action_space.n       #gives number of actions to take.

  logger.warn(


In [3]:
env.unwrapped.get_action_meanings() #To know the actions that the spaceship in the game has.

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [3]:
episodes = 3           #playing 5 different games of spaceinvaders
for episode in range(1, episodes+1):    #looping through the episodes
    #resetting the variables
    state = env.reset()
    done = False        #flag for if game is done it is stopped
    score = 0           #counter
    
    while not done:
        action = random.choice([0,1,2,3,4,5])  #random choice out of the above actions
        n_state, reward, done, info = env.step(action)    #grab the following parameters and take the actions applied above and apply them on the env
        score+=reward   #append the score to the counter
    print(f"Episode:{episode} Score:{score}")

Episode:1 Score:105.0
Episode:2 Score:20.0
Episode:3 Score:115.0


In [5]:
env.close()

# Creating a Deep Learning Model with Keras

The Deep learning Model will be used side-by-side with the agent to 'learn' how best to operate in the OpenAI environment.

In [5]:
import numpy as np
from tensorflow.python import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

In [6]:
#to define what our deep learning model will look like
def build_model(height, width, channels, actions):
    model = Sequential()
    #to add layers on to our neural network
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3, height, width, channels)))
    #stack other convolution layers on top
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    #one with a one by one stride going pixel by pixel
    model.add(Convolution2D(64, (3,3), activation='relu'))
    
    #takes all of the layers and flatten into a single layer in order to parse into the Dense layer
    model.add(Flatten())
    
    #Dense layers/fully connected layers meaning each unit in that particular layer is connected to every single unit in the next layer
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))#6 units each one rep an action
    return model

In [9]:
del model

In [9]:
#Create the model
model = build_model(height, width, channels, actions)

# Building the Keras-RL Agent

Duelling Networks split value and advantage and they help the model learn when to take action and when not to bother. Not so much a competing but a modified network.

In [7]:
from rl.agents import DQNAgent #the reinforcement agent we will be using
from rl.memory import SequentialMemory  #to hold the knowledge buffer for the reinforcement learning agents.
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy #EP to find the best reward outcome, linear to give a little bit of decay in order to close in on an optimal strategy.

In [8]:
#function to build our Keras-RL agent
def build_agent(model, actions):
    #define what our search and decay looks like
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    
    #using Sequential Memory
    memory = SequentialMemory(limit=1000, window_length=3)
    
    #defining the dqn agent
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg',
                  nb_actions=actions, nb_steps_warmup=1000
                  )
    return dqn

In [31]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-4)) #using Adam optimizer to optimize our method with a learning rate of .0001

In [14]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)

  super(Adam, self).__init__(name, **kwargs)


Training for 10000 steps ...


  updates=self.state_updates,


  384/10000: episode: 1, duration: 39.594s, episode steps: 384, steps per second:  10, episode reward: 20.000, mean reward:  0.052 [ 0.000, 10.000], mean action: 2.503 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
  842/10000: episode: 2, duration: 45.060s, episode steps: 458, steps per second:  10, episode reward: 65.000, mean reward:  0.142 [ 0.000, 20.000], mean action: 2.341 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --


  updates=self.state_updates,


 1318/10000: episode: 3, duration: 531.341s, episode steps: 476, steps per second:   1, episode reward: 120.000, mean reward:  0.252 [ 0.000, 30.000], mean action: 2.445 [0.000, 5.000],  loss: 9.649218, mean_q: 3.604913, mean_eps: 0.895690
 1619/10000: episode: 4, duration: 462.715s, episode steps: 301, steps per second:   1, episode reward: 55.000, mean reward:  0.183 [ 0.000, 20.000], mean action: 2.349 [0.000, 5.000],  loss: 0.938952, mean_q: 2.848165, mean_eps: 0.867880
 2432/10000: episode: 5, duration: 7538.240s, episode steps: 813, steps per second:   0, episode reward: 245.000, mean reward:  0.301 [ 0.000, 30.000], mean action: 2.480 [0.000, 5.000],  loss: 1.007363, mean_q: 3.509114, mean_eps: 0.817750
 2920/10000: episode: 6, duration: 794.197s, episode steps: 488, steps per second:   1, episode reward: 105.000, mean reward:  0.215 [ 0.000, 25.000], mean action: 2.547 [0.000, 5.000],  loss: 0.790689, mean_q: 3.794214, mean_eps: 0.759205
 3512/10000: episode: 7, duration: 1083.

<keras.callbacks.History at 0x148c28cb220>

# Reloading Agent from Memory

In [11]:
dqn.save_weights('SavedWeights/10k-Fast/dqn_weights.h5f')#saving the weights

NameError: name 'dqn' is not defined

In [10]:
dqn=build_agent(model, actions)
dqn.compile(Adam(lr=1e-4))
dqn.load_weights('SavedWeights/10k-Fast/dqn_weights.h5f')

  super(Adam, self).__init__(name, **kwargs)


In [11]:
scores = dqn.test(env, nb_episodes=3, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 3 episodes ...


  updates=self.state_updates,


Episode 1: reward: 180.000, steps: 899
Episode 2: reward: 285.000, steps: 1248
Episode 3: reward: 185.000, steps: 812
216.66666666666666
