https://colab.research.google.com/drive/11iCB_ma9gvcHhtoNbSgDb6Q_4YJBFGa7

In [0]:
#producing and updating a Q-table can become ineffective in big state space environments
#Instead of using a Q-table, we’ll implement a Neural Network that takes a state and 
#approximates Q-values for each action based on that state.

In [0]:
! wget http://www.atarimania.com/roms/Roms.rar && unrar x Roms.rar && unzip Roms/ROMS.zip
! pip3 install gym-retro
! python3 -m retro.import ROMS/

--2019-02-20 02:12:13--  http://www.atarimania.com/roms/Roms.rar
Resolving www.atarimania.com (www.atarimania.com)... 195.154.81.199
Connecting to www.atarimania.com (www.atarimania.com)|195.154.81.199|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10823448 (10M) [application/x-rar-compressed]
Saving to: ‘Roms.rar’


2019-02-20 02:12:57 (244 KB/s) - ‘Roms.rar’ saved [10823448/10823448]


UNRAR 5.50 freeware      Copyright (c) 1993-2017 Alexander Roshal


Extracting from Roms.rar

Creating    Roms                                                      OK
Extracting  Roms/HC ROMS.zip                                              37%  OK 
Extracting  Roms/ROMS.zip                                                 76% 99%  OK 
All OK
Archive:  Roms/ROMS.zip
   creating: ROMS/
  inflating: ROMS/128 in 1 Game Select ROM (128 in 1) (Unknown) ~.bin  
  inflating: ROMS/2 Pak Special - Cavern Blaster, City War (1992) (HES) (773-867) (PAL).bin  
  inflat

# Atari Space Invaders

In [0]:
import tensorflow as tf
import numpy as np
import retro

from skimage import transform
from skimage.color import rgb2gray

import matplotlib.pyplot as plt
from collections import deque

import random
import warnings

### Create the Environment

In [0]:
retro.data.list_games()

In [0]:
env=retro.make(game='SpaceInvaders-Atari2600')

In [0]:
env.observation_space

Box(210, 160, 3)

In [0]:
## Usually ROM issue comes like below
#FileNotFoundError: Game not found: SpaceInvaders-Atari2600. Did you make sure to import the ROM?

## Steps to get rid of ROM Issue
##Download zip file from http://www.atarimania.com/rom_collection_archive_atari_2600_roms.html
#############!wget http://www.atarimania.com/rom_collection_archive_atari_2600_roms.html
##Open Roms.rar > ROMS.rar and find Space Invaders (1980) XXXXXX
#############!apt-get install unrar
##Extract all matches (there are 5 of them) into your destin folder
############# !unrar e -r /path
##(/Users/mkirank3/anaconda3/bin/python3.6) or python -m retro.import . (don't forget the point)

In [0]:
print("Size of our state space",env.observation_space)
print("Action Size",env.action_space.n)
possible_actions=np.array(np.identity(env.action_space.n,dtype=int).tolist())

Size of our state space Box(210, 160, 3)
Action Size 8


### Pre Processing 

In [0]:
def preprocess_frame(frame):
    # Greyscale frame 
    gray = rgb2gray(frame)
    # Crop the screen (remove the part below the player)
    # [Up: Down, Left: right]
    cropped_frame = gray[8:-12,4:-12]
    # Normalize Pixel Values
    normalized_frame = cropped_frame/255.0
    # Resize
    preprocessed_frame = transform.resize(normalized_frame, [110,84])
    return preprocessed_frame 

### Stack Frames

In [0]:
stack_size = 4 # We stack 4 frames

# Initialize deque with zero-images one array for each image
stacked_frames  =  deque([np.zeros((110,84), dtype=np.int) for i in range(stack_size)], maxlen=4)

def stack_frames(stacked_frames, state, is_new_episode):
    # Preprocess frame
    frame = preprocess_frame(state)
    
    if is_new_episode:
        # Clear our stacked_frames
        stacked_frames = deque([np.zeros((110,84), dtype=np.int) for i in range(stack_size)], maxlen=4)
        
        # Because we're in a new episode, copy the same frame 4x
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        
        # Stack the frames
        stacked_state = np.stack(stacked_frames, axis=2)
        
    else:
        # Append frame to deque, automatically removes the oldest frame
        stacked_frames.append(frame)

        # Build the stacked state (first dimension specifies different frames)
        stacked_state = np.stack(stacked_frames, axis=2) 
    
    return stacked_state, stacked_frames

### Hyper Parameters

In [0]:
### MODEL HYPERPARAMETERS
state_size = [110, 84, 4]      # Our input is a stack of 4 frames hence 110x84x4 (Width, height, channels) 
action_size = env.action_space.n # 8 possible actions
learning_rate =  0.00025      # Alpha (aka learning rate)

### TRAINING HYPERPARAMETERS
total_episodes = 50            # Total episodes for training
max_steps = 50000              # Max possible steps in an episode
batch_size = 64                # Batch size

# Exploration parameters for epsilon greedy strategy
explore_start = 1.0            # exploration probability at start
explore_stop = 0.01            # minimum exploration probability 
decay_rate = 0.00001           # exponential decay rate for exploration prob

# Q learning hyperparameters
gamma = 0.9                    # Discounting rate

### MEMORY HYPERPARAMETERS
pretrain_length = batch_size   # Number of experiences stored in the Memory when initialized for the first time
memory_size = 1000000          # Number of experiences the Memory can keep

### PREPROCESSING HYPERPARAMETERS
stack_size = 4                 # Number of frames stacked

### MODIFY THIS TO FALSE IF YOU JUST WANT TO SEE THE TRAINED AGENT
training = False

## TURN THIS TO TRUE IF YOU WANT TO RENDER THE ENVIRONMENT
episode_render = False

In [0]:
from keras import Sequential,Model
from keras.layers import Dense,Conv2D,Input,Flatten,Convolution2D
from keras.layers.advanced_activations import ELU
from keras.optimizers import Adam
from collections import deque

Using TensorFlow backend.


### Experience Replay

In [0]:
memory = deque(maxlen=1000)

In [0]:
def sample(memory, batch_size):
        buffer_size = len(memory)
        index = np.random.choice(np.arange(buffer_size),
                                size = batch_size,
                                replace = False)
        
        return [memory[i] for i in index]

In [0]:
#memory = Memory(max_size = memory_size)
for i in range(pretrain_length):
    if i == 0:
        state = env.reset()
        state, stacked_frames = stack_frames(stacked_frames, state, True)
    # Get the next_state, the rewards, done by taking a random action
    choice = random.randint(1,len(possible_actions))-1
    action = possible_actions[choice]
    next_state, reward, done, _ = env.step(action)
    next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
    if done:
        next_state = np.zeros(state.shape)
        memory.append((state, action, reward, next_state, done))
        state = env.reset()
        state, stacked_frames = stack_frames(stacked_frames, state, True)
    else:
        memory.append((state, action, reward, next_state, done))
        state = next_state

  warn("The default mode, 'constant', will be changed to 'reflect' in "


### Train our Agent

In [0]:
def predict_action(model,explore_start, explore_stop, decay_rate, decay_step, state, actions):
    exp_exp_tradeoff = np.random.rand()
    explore_probability = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * decay_step)
    if (explore_probability > exp_exp_tradeoff):
        choice = random.randint(1,len(possible_actions))-1
        action = possible_actions[choice]
    else:
        Qs = model.predict(state.reshape((1, *state.shape)))
        choice = np.argmax(Qs)
        action = possible_actions[choice]
    return action, explore_probability

In [0]:
state,frames=stack_frames(stacked_frames, state, True)

  warn("The default mode, 'constant', will be changed to 'reflect' in "


In [0]:
def replay(agent, batch_size,memory,explore_start,explore_stop,decay_rate):
    minibatch = sample(memory,batch_size)
    for state, action, reward, next_state, done in minibatch:
        target = reward
        if not done:
            target = reward + gamma*np.max(agent.predict(next_state.reshape((1,*next_state.shape)))[0])
        target_f = agent.predict(state.reshape((1,*state.shape)))
        target_f[0][action] = target
        agent.fit(state.reshape((1,*state.shape)), target_f, epochs=1, verbose=0)
    if explore_start > explore_stop:
        explore_start *= 0.995

In [0]:
def DQNetwork():
    model=Sequential()
    model.add(Convolution2D(32,input_shape=(110,84,4),kernel_size=8, strides=4, padding='valid',activation='elu'))
    model.add(Convolution2D(64, kernel_size=4, strides=2, padding='valid',activation='elu'))
    model.add(Convolution2D(128, kernel_size=3, strides=2, padding='valid',activation='elu'))
    model.add(Flatten())
    model.add(Dense(units=512))
    model.add(Dense(units=3,activation='softmax'))
    model.compile(optimizer=Adam(0.01),loss='mse')
    return model

In [0]:
agent = DQNetwork()
agent.summary()
rewards_list=[]
# Iterate the game
for episode in range(500):
    # reset state in the beginning of each game
    decay_step=0
    step = 0
    episode_rewards = []
    state = env.reset()
    state, stacked_frames = stack_frames(stacked_frames, state, True)
    while step < max_steps:
        step += 1
        decay_step +=1
        # Predict the action to take and take it
        action, explore_probability = predict_action(agent,explore_start, explore_stop, decay_rate, decay_step, state, possible_actions)
        #Perform the action and get the next_state, reward, and done information
        next_state, reward, done, _ = env.step(action)
        # Add the reward to total reward
        episode_rewards.append(reward)
        if done:
                # The episode ends so no next state
                next_state = np.zeros((110,84), dtype=np.int)
                next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
                # Set step = max_steps to end the episode
                step = max_steps
                # Get the total reward of the episode
                total_reward = np.sum(episode_rewards)

                print('Episode: {}'.format(episode),
                              'Total reward: {}'.format(total_reward),
                              'Explore P: {:.4f}'.format(explore_probability))

                rewards_list.append((episode, total_reward))
                # Store transition <st,at,rt+1,st+1> in memory D
                memory.append((state, action, reward, next_state, done))

        else:
                # Stack the frame of the next_state
                next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
                # Add experience to memory
                memory.append((state, action, reward, next_state, done))
                # st+1 is now our current state
                state = next_state
    # train the agent with the experience of the episode
    replay(agent,32,memory,explore_start,explore_stop,decay_rate)

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 20, 32)        8224      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 9, 64)         32832     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 5, 4, 128)         73856     
_________________________________________________________________
flatten_1 (Flatten)          (None, 2560)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               1311232   
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 1539      
Total params: 1,427,683
Trainable params: 1,427,683
Non-trainable para

  warn("The default mode, 'constant', will be changed to 'reflect' in "


Episode: 0 Total reward: 185.0 Explore P: 0.9715
Instructions for updating:
Use tf.cast instead.
Episode: 1 Total reward: 105.0 Explore P: 0.9822
Episode: 2 Total reward: 185.0 Explore P: 0.9609
Episode: 3 Total reward: 110.0 Explore P: 0.9779
Episode: 4 Total reward: 105.0 Explore P: 0.9810
Episode: 5 Total reward: 285.0 Explore P: 0.9713
Episode: 6 Total reward: 240.0 Explore P: 0.9731
Episode: 7 Total reward: 380.0 Explore P: 0.9756
Episode: 8 Total reward: 190.0 Explore P: 0.9718
Episode: 9 Total reward: 460.0 Explore P: 0.9726
Episode: 10 Total reward: 55.0 Explore P: 0.9819
Episode: 11 Total reward: 225.0 Explore P: 0.9725
Episode: 12 Total reward: 210.0 Explore P: 0.9762
Episode: 13 Total reward: 260.0 Explore P: 0.9718
Episode: 14 Total reward: 260.0 Explore P: 0.9725
Episode: 15 Total reward: 465.0 Explore P: 0.9718
Episode: 16 Total reward: 410.0 Explore P: 0.9755
Episode: 17 Total reward: 415.0 Explore P: 0.9743
Episode: 18 Total reward: 80.0 Explore P: 0.9822
Episode: 19 To

In [0]:
env.close()