In [5]:
import tensorflow as tf
from tensorflow import keras
from keras import layers , models
from keras.optimizers import RMSprop
from keras.models import model_from_json
import random
import numpy as np
from PIL import Image
import gym
import matplotlib
import matplotlib.pyplot as plt

In [6]:
class Memory():
    
    def __init__(self , max_size):
        
        '''
        Create a list containing
        past experiences.
        '''
        
        self.max_size = max_size
        self.memory = []
    
    
    def push(self, batch):
        
        '''
        Push a batch into
        replay memory
        '''
        if(len(self.memory) > self.max_size):
            self.memory.pop(0)
        else:
            self.memory.append(batch)
    
    
    def sample(self, batch_size):
        
        '''
        Sample a random batch
        of a given batch size
        '''
        
        return random.sample(self.memory , batch_size)
    
    def length(self):
        
        '''
        Returns length of 
        replay memory list
        '''
        return len(self.memory)        

In [7]:
class CNNetwork():
    
    def __init__(self , nb_actions , input_dimension):
        
        self.nb_actions = nb_actions
        self.input_dimension = input_dimension
        self.model = self.create_model()
        
    
    def create_model(self):
        
        '''
        Create a CNN model
        '''
        
        model = models.Sequential()
        
        model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape= self.input_dimension))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        
        model.add(layers.Flatten())
        model.add(layers.Dense(512 , activation = 'relu'))
        model.add(layers.Dense(self.nb_actions , activation = 'linear'))
        
        model.compile(loss = 'mse',
                      optimizer = RMSprop(lr = 0.0002 ,
                                          rho = 0.95 ,
                                          epsilon = None ,
                                          decay = 0.00))
        
        return model
    
    
    def model_summary(self):
        
        '''
        Prints out the
        model summary
        '''
        
        self.model.summary()
    
    def update_model(self , new_model):
        
        '''
        Updating weights of
        the model
        '''
        
        self.model.set_weights(new_model.get_weights())

In [8]:
network = CNNetwork(nb_actions = 9 , input_dimension = (88 , 84 , 1))
network.model_summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 86, 82, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 43, 41, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 41, 39, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 20, 19, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 18, 17, 64)        36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 19584)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)              

In [9]:
def preprocess_image(obs):

    img = obs[1: 176:2, ::2] # downsize/crop images
    img = img.mean(axis = 2) # turn to greyscale   
    img = (img // 3 - 128).astype(np.int8) # normalisation from -128 to 127
    return img.reshape(1 , 88, 80, 1) # reshape to 88 x 80 pixels

In [10]:
class DQNAgent():
    
    def __init__(self , state_size , action_size , discount):
        
        self.state_size = state_size
        self.action_size = action_size
        
        self.epsilon = 1
        self.epsilon_decay = .995
        self.epsilon_min = .1
        self.discount = discount
        
        
        self.primary_model = CNNetwork(nb_actions = self.action_size,
                               input_dimension = self.state_size)
        
#         print(self.model.model.get_weights())
    
        self.target_model = CNNetwork(nb_actions = self.action_size,
                               input_dimension = self.state_size)
        
        self.target_model.update_model(self.primary_model.model)
        
        self.replay = Memory(max_size = 64)
        
    def remember(self , state , action , reward , next_state , done):
        '''
        Pushing to 
        replat memory
        '''
        self.replay.push((state , action , reward , next_state , done))
    
    
    def get_action(self , state):
        
        if(np.random.rand() <= self.epsilon):
            return random.randrange(self.action_size)
        
        qvals = self.primary_model.model.predict(state)
        
        return np.argmax(qvals[0])
    
    def replay_game(self , batch_size):
        minibatch = self.replay.sample(batch_size = batch_size)
        
        for state , action , reward , next_state , done in minibatch:
            
            if not done:
                
                max_action = np.argmax(self.primary_model.model.predict(next_state)[0])
                target = reward + self.discount * (self.target_model.model.predict(next_state)[0][max_action])
            
            else:
                
                target = reward
            
            new_qvals = self.primary_model.model.predict(state)
            new_qvals[0][max_action] = target
            
            self.primary_model.model.fit(x = state,
                                         y = new_qvals, 
                                         epochs = 5, 
                                         verbose = 0)
        
        # if self.epsilon > self.epsilon_min:
            
        #     self.epsilon *= self.epsilon_decay

In [11]:
agent = DQNAgent(state_size = (88 , 80 , 1),
                 action_size = 9,
                 discount = .95)

In [12]:
episodes = 1
skip = 90
batch_size = 4
update_rate = 10
env = gym.make('MsPacman-v0')

In [9]:
for e in range(episodes):
    game_score = 0
    old_state = env.reset()
    
    for _ in range(skip):
        old_state , reward , _ , _ = env.step(1)
        game_score += reward
    
    old_state = preprocess_image(old_state)
    
    for time in range(200):
        '''
        Update target model
        '''
        if time % update_rate == 0:
            agent.target_model.update_model(agent.primary_model.model)
        
        
        action = agent.get_action(old_state)
        
        new_state , reward , done , _ = env.step(action)
        game_score += reward
        new_state = preprocess_image(new_state)
        
        agent.remember(old_state , action , reward , new_state , done)
        
        old_state = new_state
        
        print("Time is : {}".format(time))
        if done:

          # print("Episode : {}/{} , Game Score : {}".format(e , episodes , game_score))
          break
        
        if(agent.replay.length() > batch_size):
            agent.replay_game(batch_size)
          
        print("Episode : {}/{} , Game Score : {}".format(e , episodes , game_score))

Time is : 0
Episode : 0/1 , Game Score : 0.0
Time is : 1
Episode : 0/1 , Game Score : 0.0
Time is : 2
Episode : 0/1 , Game Score : 0.0
Time is : 3
Episode : 0/1 , Game Score : 0.0
Time is : 4


ValueError: None values not supported.

In [13]:
from keras.models import model_from_json
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model_weights.h5")
print("Loaded model from disk")

Loaded model from disk


In [20]:
env = gym.make('MsPacman-v0')
loaded_model.weights
loaded_model.compile(loss='mse', optimizer='rmsprop', metrics=['accuracy'])

In [22]:
for games in range(10):
  state = env.reset()
  state = preprocess_image(state)
  game_score = 0

  for time in range(20_000):

    action = loaded_model.predict(state)
    new_state , reward , done , _  = env.step(np.argmax(action))

    new_state = preprocess_image(new_state)
    old_state = new_state

    game_score += reward
    
    if done:
      print("Score : " , game_score)
      break

Score :  210.0
Score :  210.0
Score :  210.0
Score :  210.0
Score :  210.0
Score :  210.0
Score :  210.0
Score :  210.0
Score :  210.0
Score :  210.0
