In [403]:
from pprint import pprint

import numpy as np

import sys
np.set_printoptions(threshold=sys.maxsize)

import tensorflow as tf      # Deep Learning library
import numpy as np           # Handle matrices
from vizdoom import *        # Doom Environment

import random                # Handling random number generation
import time                  # Handling time calculation
from skimage import transform# Help us to preprocess the frames

from collections import deque# Ordered collection with ends
import matplotlib.pyplot as plt # Display graphs

import warnings # This ignore all the warning messages that are normally printed during the training because of skiimage
warnings.filterwarnings('ignore')

In [404]:
"""
Here we create our environment
"""
def create_environment():
    game = DoomGame()
    
    # Load the correct configuration
    game.load_config("basic.cfg")
    
    # Load the correct scenario (in our case basic scenario)
    game.set_doom_scenario_path("basic.wad")

    # Add labels to find enemy position
    game.set_labels_buffer_enabled(True)
    
    game.init()
    
    # Here our possible actions
    # MOVE_LEFT, MOVE_RIGHT, ATTACK
    possible_actions = np.identity(3,dtype=int).tolist()
    
    return game, possible_actions

In [405]:
def get_label(labels, label_name):
    for label in labels:
        if label.object_name == label_name: return label
    return None

In [406]:
"""
Here we performing random action to test the environment
"""
def test_environment(game, actions):
    episodes = 10
    for i in range(episodes):
        game.new_episode()
        time.sleep(1)
        while not game.is_episode_finished():
            state = game.get_state()
            
            img = state.screen_buffer
            misc = state.game_variables
            action = random.choice(actions)
            reward = game.make_action(action)
#             game.advance_action()
#             reward = game.get_last_reward()
            demon = get_label(state.labels, "Cacodemon")
            player = get_label(state.labels, "DoomPlayer")
            if demon is not None:
                print(f"{demon.object_name} x: {demon.object_position_x}, y: {demon.object_position_x} {player.object_name} x: {player.object_position_x}, y: {player.object_position_y}")
        print ("Result:", game.get_total_reward())
        time.sleep(0.05)
    game.close()

In [416]:
game, actions = create_environment()

# test_environment(game, actions)

In [417]:
MAX_Y_LEFT = -200 # Max left coordinate
MAX_Y_RIGHT = 240 # Max right coordinate

MIN_STATE = MAX_Y_LEFT - MAX_Y_RIGHT # Maximum difference between right enemy and left player
MAX_STATE = MAX_Y_RIGHT - MAX_Y_LEFT # Maximum difference between left enemy and right player

STATE_SIZE = abs(MAX_STATE - MIN_STATE) + 1 # Calculate the maximum number of states 

ACTION_SIZE = len(actions) # The number of actions

MAX_EPISODES = 500

LEARNING_RATE = 0.4

GAMMA = 0.6

STATE_OFFSET = 440

epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability 
decay_rate = 0.09


In [418]:
def create_q_table(state_size, action_size):
    q_table = np.zeros((state_size,action_size))
    return q_table

In [419]:
q_table = create_q_table(STATE_SIZE, ACTION_SIZE)

In [420]:
def create_q(q_table):
    rows, columns = q_table.shape
    print(rows)
    def Q(state, action):
        if state > rows or state < 0 or action > columns or action < 0:
            return -100000000
        else:
            return q_table[state][action]
    return Q

In [421]:
def explore():
    tradeoff = random.uniform(0, 1)    
    return 1 if tradeoff < epsilon else 0

In [422]:
Q = create_q(q_table)
print(Q(0, 0))

881
0.0


In [423]:
def train(game):
    episode = 0
    while episode < MAX_EPISODES:
        game.new_episode()
        
        # Calculate the initial state
        game_state = game.get_state()
        demon = get_label(game_state.labels, "Cacodemon")
        player = get_label(game_state.labels, "DoomPlayer")
        state = int(demon.object_position_y - player.object_position_y)+STATE_OFFSET
        
        while not game.is_episode_finished():
            # Implement the Bellmann optimisation equation here
            
            to_explore = explore()
            
            if to_explore:
            # randomly choose an action from the current state --> exploration 
                action = random.choice(actions)           
            else:
            # choose the best action --> exploitation
                action = actions[np.argmax(q_table[state, :])]
            
            reward = game.make_action(action)
            
            old_game_state = game.get_last_state()
            new_game_state = game.get_state()

            if game_state is None:
            # we have reached the end of the game by reaching the demon
                print(f"{state} {action}")
                q_table[state, action] = q_table[state, action] + LEARNING_RATE * (reward - q_table[state, action])
                break

            demon = get_label(game_state.labels, "Cacodemon")
            player = get_label(game_state.labels, "DoomPlayer")
            
            if demon is None or player is None: continue

            new_state = int(demon.object_position_y - player.object_position_y)+STATE_OFFSET
        
            q_table[state, action] = q_table[state, action] + LEARNING_RATE * (reward + GAMMA * 
                                    np.max(q_table[new_state, :]) - q_table[state, action])
            state = new_state
            if reward > 0: break
        print ("End of one episode")
        epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)
        print(f"epsilon : {epsilon}")
        episode = episode + 1
        time.sleep(0.5)
        # print(np.sum(q_table))
    game.close()

In [424]:
train(game)

TypeError: get_game_variable(): incompatible function arguments. The following argument types are supported:
    1. (self: vizdoom.vizdoom.DoomGame, arg0: vizdoom.vizdoom.GameVariable) -> float

Invoked with: <vizdoom.vizdoom.DoomGame object at 0x7f0ed9699b70>, 'AMMO2'

In [370]:
print(q_table)

[[ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+

In [338]:
pprint(q_table)

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       ...,
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])
