<a href="https://colab.research.google.com/github/MaekTec/Deep_reinforcement_learning_Course/blob/solutions/Deep%20Q%20Learning/Space%20Invaders/DQN_Atari_Space_Invaders_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Step 1: Import the libraries

In [1]:
import tensorflow as tf      # Deep Learning library
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Lambda, Multiply
from keras import backend as K

print(tf.VERSION)
print(tf.keras.__version__)

import numpy as np           # Handle matrices
import gym                 # Retro Environment


from skimage import transform # Help us to preprocess the frames
from skimage.color import rgb2gray # Help us to gray our frames

import matplotlib.pyplot as plt # Display graphs

from collections import deque# Ordered collection with ends

import random

import warnings # This ignore all the warning messages that are normally printed during the training because of skiimage
warnings.filterwarnings('ignore') 

Using TensorFlow backend.


1.13.1
2.2.4-tf


## Step 2: Create our environment

In [2]:
# Create our environment
env = gym.make('SpaceInvaders-v0')

print("The size of our frame is: ", env.observation_space)
print("The action size is : ", env.action_space.n)

# Here we create an hot encoded version of our actions
# possible_actions = [0, 1, 2, 3, ...]
possible_actions = np.array(np.identity(env.action_space.n,dtype=int).tolist())

The size of our frame is:  Box(210, 160, 3)
The action size is :  6


## Step 3: Define the preprocessing functions

In [0]:
"""
    preprocess_frame:
    Take a frame.
    Grayscale it
    Resize it.
        __________________
        |                 |
        |                 |
        |                 |
        |                 |
        |_________________|
        
        to
        _____________
        |            |
        |            |
        |            |
        |____________|
    Normalize it.
    
    return preprocessed_frame
    
    """
def preprocess_frame(frame):
    # Greyscale frame 
    gray = rgb2gray(frame)
    
    # Crop the screen (remove the part below the player)
    # [Up: Down, Left: right]
    cropped_frame = gray[8:-12,4:-12]
    
    # Normalize Pixel Values
    normalized_frame = cropped_frame/255.0
    
    # Resize
    # Thanks to Mikołaj Walkowiak
    preprocessed_frame = transform.resize(normalized_frame, [110,84])
    
    return preprocessed_frame # 110x84x1 frame

In [0]:
stack_size = 4 # We stack 4 frames

# Initialize deque with zero-images one array for each image
stacked_frames  =  deque([np.zeros((110,84), dtype=np.int) for i in range(stack_size)], maxlen=4)

def stack_frames(stacked_frames, state, is_new_episode):
    # Preprocess frame
    frame = preprocess_frame(state)
    
    if is_new_episode:
        # Clear our stacked_frames
        stacked_frames = deque([np.zeros((110,84), dtype=np.int) for i in range(stack_size)], maxlen=4)
        
        # Because we're in a new episode, copy the same frame 4x
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        
        # Stack the frames
        stacked_state = np.stack(stacked_frames, axis=2)
        
    else:
        # Append frame to deque, automatically removes the oldest frame
        stacked_frames.append(frame)

        # Build the stacked state (first dimension specifies different frames)
        stacked_state = np.stack(stacked_frames, axis=2) 
    
    return stacked_state, stacked_frames

## Step 4: Set up our hyperparameters

In [0]:
### MODEL HYPERPARAMETERS
state_size = [110, 84, 4]      # Our input is a stack of 4 frames hence 110x84x4 (Width, height, channels) 
action_size = env.action_space.n # 8 possible actions
learning_rate =  0.00025      # Alpha (aka learning rate)

### TRAINING HYPERPARAMETERS
total_episodes = 100            # Total episodes for training
max_steps = 50000              # Max possible steps in an episode
batch_size = 64                # Batch size

# Exploration parameters for epsilon greedy strategy
explore_start = 1.0            # exploration probability at start
explore_stop = 0.01            # minimum exploration probability 
decay_rate = 0.00001           # exponential decay rate for exploration prob

# Q learning hyperparameters
gamma = 0.9                    # Discounting rate

### MEMORY HYPERPARAMETERS
pretrain_length = batch_size   # Number of experiences stored in the Memory when initialized for the first time
memory_size = 20000          # Number of experiences the Memory can keep

### PREPROCESSING HYPERPARAMETERS
stack_size = 4                 # Number of frames stacked

### MODIFY THIS TO FALSE IF YOU JUST WANT TO SEE THE TRAINED AGENT
training = True

## TURN THIS TO TRUE IF YOU WANT TO RENDER THE ENVIRONMENT
episode_render = False

## Step 5: Create our Deep Q-learning Neural Network model

In [0]:
class DQNetwork:
  def __init__(self, state_size, action_size, learning_rate, name='DQNetwork'):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        
        self.inputs_ = Input(shape=(*self.state_size,), dtype='float32')
        self.actions_ = Input(shape=(self.action_size,), dtype='float32')
        #self.target_Q = Input(shape=(1,), dtype='float32')
        
        conv1 = Conv2D(32, [8, 8], strides=[4, 4], padding='valid', activation='elu')(self.inputs_)
        conv2 = Conv2D(64, [4, 4], strides=[2, 2], padding='valid', activation='elu')(conv1)
        conv3 = Conv2D(64, [3, 3], strides=[2, 2], padding='valid', activation='elu')(conv2)
        flatten = Flatten()(conv3)
        fc = Dense(512, activation='elu')(flatten)
        output = Dense(self.action_size, activation='elu')(fc)
        
        ReduceSum = Lambda(lambda z: K.sum(z, axis=1))
        Q = ReduceSum(Multiply()([output, self.actions_]))
        
        adam = optimizers.Adam(lr=self.learning_rate)
        model = Model(inputs=[self.inputs_, self.actions_], outputs=Q)
        model.compile(optimizer=adam, loss='mean_squared_error', metrics=['accuracy'])
        
        print(model.summary())

In [7]:
DQNetwork = DQNetwork(state_size, action_size, learning_rate)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 110, 84, 4)   0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 26, 20, 32)   8224        input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 12, 9, 64)    32832       conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 5, 4, 64)     36928       conv2d_1[0][0]         