In [1]:
import tensorflow as tf      # Deep Learning library
import numpy as np           # Handle matrices
from vizdoom import *        # Doom Environment

import random                # Handling random number generation
import time                  # Handling time calculation
from skimage import transform# Help us to preprocess the frames

from collections import deque# Ordered collection with ends
import matplotlib.pyplot as plt # Display graphs

import warnings # This ignore all the warning messages that are normally printed during the training because of skiimage
warnings.filterwarnings('ignore')

In [2]:
"""
Here we create our environment
"""
def create_environment():
    game = DoomGame()
    
    # Load the correct configuration
    game.load_config("basic.cfg")
    
    # Load the correct scenario (in our case basic scenario)
    game.set_doom_scenario_path("basic.wad")

    # Add labels to find enemy position
    game.set_labels_buffer_enabled(True)
    
    game.init()
    
    # Here our possible actions
    left = [1, 0, 0]
    right = [0, 1, 0]
    shoot = [0, 0, 1]
    possible_actions = [left, right, shoot]
    
    return game, possible_actions  

In [3]:
def get_label(labels, label_name):
    for label in labels:
        if label.object_name == label_name:
            return label
    return None

In [4]:
"""
Here we performing random action to test the environment
"""
def test_environment(game):
    shoot = [0, 0, 1]
    left = [1, 0, 0]
    right = [0, 1, 0]
    actions = [shoot, left, right]

    episodes = 3
    for i in range(episodes):
        game.new_episode()
        while not game.is_episode_finished():
            state = game.get_state()
            
            img = state.screen_buffer
            misc = state.game_variables
            action = random.choice(actions)
            # reward = game.make_action(action)
            reward = game.get_last_reward()
            game.advance_action()
            demon = get_label(state.labels, 'Cacodemon')
            player = get_label(state.labels, 'DoomPlayer')
                        
            if (demon is not None and player is not None):
                print(f"{demon.object_name} x: {demon.object_position_x}, y: {demon.object_position_y} z: {demon.object_position_z} \
                    {player.object_name} x: {player.object_position_x}, y:{player.object_position_y}, z:{player.object_position_z}")
        print ("Result:", game.get_total_reward())

        time.sleep(0.05)
    game.close()

In [5]:
game, possible_actions = create_environment()

test_environment(game)

Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32

Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32

Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32

Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -68.482177734375 z: 0.0                     DoomPlayer x: -384.0, y:32

Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -3

Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -3

Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: -75.7154541015625 z: 0.0                     DoomPlayer x: -3

Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0


Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0


Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0


Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0


Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0


Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0
Cacodemon x: 0.0, y: 112.40283203125 z: 0.0                     DoomPlayer x: -384.0, y:32.0, z:0.0


In [None]:
# demon.width = 117
# demon.height = 147

# def preprocess_frame(frame):
    # Greyscale frame already done in our vizdoom config
    
    # Crop the screen (remove the roof because it contains no information)
    cropped_frame = frame[30:-10,30:-30]
    
    # Normalize Pixel Values
    normalized_frame = cropped_frame/255.0
    
    # Resize
    preprocessed_frame = transform.resize(normalized_frame, [84,84])
    
    return preprocessed_frame

In [18]:
stack_size = 4 # We stack 4 frames

# Initialize deque with zero-images one array for each image
stacked_frames  =  deque([np.zeros((84,84), dtype=np.int) for i in range(stack_size)], maxlen=4) 

def stack_frames(stacked_frames, state, is_new_episode):
    frame = preprocess_frame(state)
    
    if is_new_episode:
        # Clear our stacked_frames
        stacked_frames = deque([np.zeros((84,84), dtype=np.int) for i in range(stack_size)], maxlen=4)
        
        # Because we're in a new episode, copy the same frame 4x
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        
        # Stack the frames
        stacked_state = np.stack(stacked_frames, axis=2)
        
    else:
        # Append frame to deque, automatically removes the oldest frame
        stacked_frames.append(frame)

        # Build the stacked state (first dimension specifies different frames)
        stacked_state = np.stack(stacked_frames, axis=2) 
    
    return stacked_state, stacked_frames

In [19]:
### MODEL HYPERPARAMETERS
state_size = [84,84,4]      #input is a stack of 4 frames hence 84x84x4 (Width, height, channels) 
action_size = game.get_available_buttons_size()              # 3 actions: left, right, shoot
learning_rate =  0.0002      # Alpha (learning rate)

### TRAINING HYPERPARAMETERS
total_episodes = 500        # Total episodes for training
max_steps = 100              # Max possible steps in an episode
batch_size = 64 

### MEMORY HYPERPARAMETERS
pretrain_length = batch_size   # Number of experiences stored in the Memory when initialized for the first time
memory_size = 1000000          # Number of experiences the Memory can keep

### MODIFY THIS TO FALSE IF YOU JUST WANT TO SEE THE TRAINED AGENT
training = True

In [20]:
class DQNetwork:
    def __init__(self, state_size, action_size, learning_rate, name='DQNetwork'):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        
        with tf.variable_scope(name):
            # We create the placeholders
            # *state_size means that we take each elements of state_size in tuple hence is like if we wrote
            # [None, 84, 84, 4]
            self.inputs_ = tf.placeholder(tf.float32, [None, *state_size], name="inputs")
            self.actions_ = tf.placeholder(tf.float32, [None, 3], name="actions_")
            
            # Remember that target_Q is the R(s,a) + ymax Qhat(s', a')
            self.target_Q = tf.placeholder(tf.float32, [None], name="target")
            
            """
            First convnet:
            CNN
            BatchNormalization
            ELU
            """
            # Input is 84x84x4
            self.conv1 = tf.layers.conv2d(inputs = self.inputs_,
                                         filters = 32,
                                         kernel_size = [8,8],
                                         strides = [4,4],
                                         padding = "VALID",
                                          kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                         name = "conv1")
            
            self.conv1_batchnorm = tf.layers.batch_normalization(self.conv1,
                                                   training = True,
                                                   epsilon = 1e-5,
                                                     name = 'batch_norm1')
            
            self.conv1_out = tf.nn.elu(self.conv1_batchnorm, name="conv1_out")
            ## output --> [20, 20, 32]
            
            
            """
            Second convnet:
            CNN
            BatchNormalization
            ELU
            """
            self.conv2 = tf.layers.conv2d(inputs = self.conv1_out,
                                 filters = 64,
                                 kernel_size = [4,4],
                                 strides = [2,2],
                                 padding = "VALID",
                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                 name = "conv2")
        
            self.conv2_batchnorm = tf.layers.batch_normalization(self.conv2,
                                                   training = True,
                                                   epsilon = 1e-5,
                                                     name = 'batch_norm2')

            self.conv2_out = tf.nn.elu(self.conv2_batchnorm, name="conv2_out")
            ## output --> [9, 9, 64]
            
            
            """
            Third convnet:
            CNN
            BatchNormalization
            ELU
            """
            self.conv3 = tf.layers.conv2d(inputs = self.conv2_out,
                                 filters = 128,
                                 kernel_size = [4,4],
                                 strides = [2,2],
                                 padding = "VALID",
                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                 name = "conv3")
        
            self.conv3_batchnorm = tf.layers.batch_normalization(self.conv3,
                                                   training = True,
                                                   epsilon = 1e-5,
                                                     name = 'batch_norm3')

            self.conv3_out = tf.nn.elu(self.conv3_batchnorm, name="conv3_out")
            ## output --> [3, 3, 128]
            
            
            self.flatten = tf.layers.flatten(self.conv3_out)
            ## --> [1152]
            
            
            self.fc = tf.layers.dense(inputs = self.flatten,
                                  units = 512,
                                  activation = tf.nn.elu,
                                       kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                name="fc1")
            
            
            self.output = tf.layers.dense(inputs = self.fc, 
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                          units = 3, 
                                        activation=None)

  
            # Q is our predicted Q value.
            self.Q = tf.reduce_sum(tf.multiply(self.output, self.actions_), axis=1)
            
            
            # The loss is the difference between our predicted Q_values and the Q_target
            # Sum(Qtarget - Q)^2
            self.loss = tf.reduce_mean(tf.square(self.target_Q - self.Q))
            
            self.optimizer = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)

In [21]:
#Reset the graph
tf.reset_default_graph()

# Instantiate the DQNetwork
DQNetwork = DQNetwork(state_size, action_size, learning_rate)

In [22]:
class Memory():
    def __init__(self, max_size):
        self.buffer = deque(maxlen = max_size)
    
    def add(self, experience):
        self.buffer.append(experience)
    
    def sample(self, batch_size):
        buffer_size = len(self.buffer)
        index = np.random.choice(np.arange(buffer_size),
                                size = batch_size,
                                replace = False)
        
        return [self.buffer[i] for i in index]

In [23]:
#Instantiate memory
memory = Memory(max_size = memory_size)

# Render the environment
game.new_episode()

for i in range(pretrain_length):
    # If it's the first step
    if i == 0:
        # First we need a state
        state = game.get_state().screen_buffer
        state, stacked_frames = stack_frames(stacked_frames, state, True)
    
    # Random action
    action = random.choice(possible_actions)
    
    # Get the rewards
    reward = game.make_action(action)
    
    # Look if the episode is finished
    done = game.is_episode_finished()
    
    # If we're dead
    if done:
        # We finished the episode
        next_state = np.zeros(state.shape)
        
        # Add experience to memory
        memory.add((state, action, reward, next_state, done))
        
        # Start a new episode
        game.new_episode()
        
        # First we need a state
        state = game.get_state().screen_buffer
        
        # Stack the frames
        state, stacked_frames = stack_frames(stacked_frames, state, True)
        
    else:
        # Get the next state
        next_state = game.get_state().screen_buffer
        next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
        
        # Add experience to memory
        memory.add((state, action, reward, next_state, done))
        
        # Our state is now the next_state
        state = next_state