## Imports

In [1]:
import vizdoom as vzd
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import cv2
from collections import deque
import random

## Variables and helper functions

In [2]:
# global variables
config_file = "defend_the_center.cfg"

#dqn parameters
l_r = 0.001
batch_size = 50
episodes_per_epoch = 100
epochs = 50

# input parameters
input_shape = (100, 100)
replay_buffer_size = 500

#game parameters
frames_to_skip = 10
discount = 0.9

# helper functions
def display_image(image):
    plt.imshow(image, cmap='gray')

def process_raw_input(image):
    return cv2.resize(image, input_shape).reshape((*input_shape, 1))

## Initialize doom

In [3]:
def initialize_game(visible=False):
    game = vzd.DoomGame()
    game.load_config("../../scenarios/" + config_file)
    game.set_window_visible(visible)
    game.set_mode(vzd.Mode.PLAYER)
    game.set_screen_format(vzd.ScreenFormat.GRAY8)
    game.set_screen_resolution(vzd.ScreenResolution.RES_1280X1024)
    game.init()
    return game

## DQN

In [6]:
class DQN():
    def __init__(self, num_actions):
        self.graph = tf.Graph()
        with self.graph.as_default(): 
            self._X = tf.placeholder(tf.float32, [None] + list(input_shape) + [1], name="X")
    #         a_ = tf.placeholder(tf.int32, [None], name="Action")
            self._y = tf.placeholder(tf.float32, [None, num_actions], name="y_true")

            # convolutional layers
            self._conv1 = tf.layers.conv2d(
                                inputs=self._X,
                                filters=8,
                                kernel_size=[9, 9],
                                strides=[2, 2],
                                activation=tf.nn.relu,
                                name="conv1")
            self._pool1 = tf.layers.max_pooling2d(inputs=self._conv1, pool_size=[2, 2], strides=2, name="pool1")

            self._conv2 = tf.layers.conv2d(
                                inputs=self._pool1,
                                filters=32,
                                kernel_size=[5, 5],
                                strides=[2, 2],
                                activation=tf.nn.relu,
                                name="conv2", )
            self._pool2 = tf.layers.max_pooling2d(inputs=self._conv2, pool_size=[2, 2], strides=2, name="pool2")

            #fcn
            self._flat = tf.layers.flatten(self._pool2, name="flat")
            self._fc1 = tf.layers.dense(inputs=self._flat, units=512, activation=tf.nn.relu, name="fc1", )
            self._dropout = tf.layers.dropout(inputs=self._fc1, rate=0.5, name="dropout")

            self._y_pred = tf.layers.dense(inputs=self._dropout, units=num_actions, name="y_pred", )

            #train
            self._loss = tf.losses.mean_squared_error(labels=self._y, predictions=self._y_pred)
            self._optimizer = tf.train.AdamOptimizer(learning_rate=l_r).minimize(self._loss)
            self._session = tf.Session(graph=self.graph)
            self._session.run(tf.global_variables_initializer())

    def train(self, X, y):
        with self.graph.as_default():
            loss, _ = self._session.run([self._loss, self._optimizer], feed_dict={self._X: X, self._y: y})
        return loss
    
    def get_q_values(self, state):
        return self._session.run(self._y_pred, feed_dict={self._X: state})
    
    def get_best_action(self, state):
        return self.get_q_values(state).argmax()
    
    def finish(self):
        self._session.close()
    
    def save_model(self, path):
        with self.graph.as_default():
            saver = tf.train.Saver()
            saver.save(self._session, path)
            
    def restore_model(self, path):
        with self.graph.as_default():
            saver = tf.train.Saver()
            saver.restore(self._session, path)

## LET'S PLAY!

In [17]:
# to experiment with how many actions to allow to occur simultaneously
def create_actions(listy, n, max_ones=1):
    '''
    returns a list of all combinations of 0's and 1's of length n with max number of 1's=max_ones
    '''
    if sum(listy) >= max_ones:
        if len(listy) < n:
            listy += [0]*(n-len(listy))
        return listy
    if len(listy) >= n:
        return listy
    return put_it_in_a_single_list([create_actions(listy + [0], n, max_ones), create_actions(listy + [1], n, max_ones)])

def put_it_in_a_single_list(listy):
    result = []
    def recurse(listy):
        for obj in listy:
            if type(obj) != list:
                result.append(listy)
                break
            else:
                recurse(obj)
    recurse(listy)
    return result

In [20]:
model = DQN(4)
model.restore_model("model/main_save")
game = initialize_game()
#actions
n = game.get_available_buttons_size()
actions = create_actions([], n, 1) #change arg3 to range(1, n)
print("Actions: ", actions)

INFO:tensorflow:Restoring parameters from model/main_save
Actions:  [[0, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]]


In [28]:
#function to see our model playing the game.

def play_optimal(visible=False):
    import time
    game = initialize_game(visible)
    game.new_episode()
    while not game.is_episode_finished():
        state = process_raw_input(game.get_state().screen_buffer)
        game.make_action(actions[model.get_best_action(state.reshape((1, *state.shape)))], frames_to_skip)
        if visible:
            time.sleep(0.2)
    game.set_window_visible(False)
    return game.get_total_reward()

In [35]:
episodes = 100
reward_list = []
for epi in range(episodes):
    reward_list.append(play_optimal(visible=False))
    if epi % 10 == 0:
        print("Average reward for {} episodes: {}".format(epi+1, sum(reward_list)/(epi+1)))

pd.DataFrame(reward_list).describe()

Average reward for 1 episodes: 3.0
Average reward for 11 episodes: 4.454545454545454
Average reward for 21 episodes: 4.523809523809524
Average reward for 31 episodes: 4.967741935483871
Average reward for 41 episodes: 4.926829268292683
Average reward for 51 episodes: 4.745098039215686
Average reward for 61 episodes: 4.754098360655738
Average reward for 71 episodes: 4.619718309859155
Average reward for 81 episodes: 4.617283950617284
Average reward for 91 episodes: 4.725274725274725


Unnamed: 0,0
count,100.0
mean,4.75
std,1.860678
min,1.0
25%,3.0
50%,4.5
75%,6.0
max,13.0
