<a href="https://colab.research.google.com/github/HemaZ/Deep-Reinforcement-Learning/blob/master/DQN_SpaceInvaders.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
! wget http://www.atarimania.com/roms/Roms.rar && unrar x Roms.rar && unzip Roms/ROMS.zip
! pip3 install gym-retro
! python3 -m retro.import ROMS/

In [0]:

import tensorflow as tf
import numpy as np
import retro                 # Retro Environment


from skimage import transform # Help us to preprocess the frames
from skimage.color import rgb2gray # Help us to gray our frames

import matplotlib.pyplot as plt # Display graphs

from collections import deque # Ordered collection with ends

import random


In [0]:
class GameEnv:
  
  def __init__(self, game = 'SpaceInvaders-Atari2600'):
    self.env = retro.make(game)
    self.n_actions = self.env.action_space.n
    self.frame_size = self.env.observation_space.shape
    self.hot_enc_actions = np.array(np.identity(self.n_actions).tolist()) 
    self.stack_size = 4
    self.stacked_frames = deque([np.zeros((110,84), dtype=np.int) for i in range(self.stack_size)], maxlen=self.stack_size)
    self.hyperparameters = {
                           'learning_rate' : 0.00025,
                           'total_episodes' : 500,
                           'max_steps' : 5000,
                           'btach_size': 64,
                           'explore_start' : 1,
                           'explore_end' : 0.01,
                           'decay_rate' : 0.00001,
                           'gamma' : 0.9,
                           'pretrain_length' : 64,
                           'memory_size' : 1000000,
                           'state_size' : [110, 84, 4]
                           }
    self.training = False
    self.render = False
    
    
    
    
  def _preprocess_frame(self,frame):
    gray_frame = rgb2gray(frame)
    cropped_frame = gray_frame[8:-12,4:-12]
    
    # Normalize Pixel Values
    normalized_frame = cropped_frame/255.0
    
    # Resize
    # Thanks to Mikołaj Walkowiak
    preprocessed_frame = transform.resize(normalized_frame, [110,84])
    
    return preprocessed_frame # 110x84x1 frame
  
  def stack_frame(self, frame, new_epis = False):
    
    processed_frame = self._preprocess_frame(frame)
    
    if new_epis:
      self.stacked_frames  =  deque([frame for _ in range(self.stack_size)], maxlen=self.stack_size)
    else:
      self.stacked_frames.append(frame)
    
    self.stacked_state = np.stack(self.stacked_frames, axis=2)
      
    
    

In [0]:
class DeepQNN:
  
  def __init__(self, gamenv):
    self.gamenv = gamenv
    with tf.variable_scope('DQNN'):
      self._inputs = tf.placeholder(tf.float32, [None, *self.gamenv.hyperparameters['state_size']], name='inputs')
      self._actions = tf.placeholder(tf.float32, [None, self.gamenv.n_actions], name='actions')
      self.target_Q = tf.placeholder(tf.float32, [None], name="target")
      
      self.conv1 = tf.layers.conv2d(inputs = self._inputs, 
                                    filters = 32,
                                    kernel_size = [8,8],
                                    strides = [4,4],
                                    padding = 'VALID',
                                    kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                    name = 'Conv1')
      self.actvf1 = tf.nn.elu(self.conv1, name='Elu1')
      
      self.conv2 = tf.layers.conv2d(inputs = self.conv1, 
                                    filters = 64,
                                    kernel_size = [4,4],
                                    strides = [2,2],
                                    padding = 'VALID',
                                    kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                    name = 'Conv2')
      self.actvf2 = tf.nn.elu(self.conv2, name='Elu2')
      
      self.conv3 = tf.layers.conv2d(inputs = self.conv2, 
                                    filters = 64,
                                    kernel_size = [3,3],
                                    strides = [2,2],
                                    padding = 'VALID',
                                    kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                    name = 'Conv3')
      self.actvf3 = tf.nn.elu(self.conv3, name='Elu3')
      
      self.flatten = tf.contrib.layers.flatten(self.actvf3)
      self.fc = tf.layers.dense(inputs = self.flatten,
                                units = 512,
                                activation = tf.nn.elu,
                                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                name="fc1")
            
      self.output = tf.layers.dense(inputs = self.fc, 
                                   kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                   units = self.gamenv.n_actions, 
                                   activation=None)
      self.Q = tf.reduce_sum(tf.multiply(self.output, self._actions))
            
            # The loss is the difference between our predicted Q_values and the Q_target
            # Sum(Qtarget - Q)^2
      self.loss = tf.reduce_mean(tf.square(self.target_Q - self.Q))
            
      self.optimizer = tf.train.AdamOptimizer(self.gamenv.hyperparameters['learning_rate']).minimize(self.loss)

In [0]:
spaceinvaders = GameEnv()

In [0]:
tf.reset_default_graph()
dqnn = DeepQNN(spaceinvaders)