In [43]:
from pprint import pprint

import numpy as np

import sys
np.set_printoptions(threshold=sys.maxsize)

import tensorflow as tf      # Deep Learning library
import numpy as np           # Handle matrices
from vizdoom import *        # Doom Environment

import random                # Handling random number generation
import time                  # Handling time calculation
from skimage import transform# Help us to preprocess the frames

from collections import deque# Ordered collection with ends
import matplotlib.pyplot as plt # Display graphs

import warnings # This ignore all the warning messages that are normally printed during the training because of skiimage
warnings.filterwarnings('ignore')

In [44]:
"""
Here we create our environment
"""
def create_environment():
    game = DoomGame()
    
    # Load the correct configuration
    game.load_config("basic.cfg")
    
    # Load the correct scenario (in our case basic scenario)
    game.set_doom_scenario_path("basic.wad")

    # Add labels to find enemy position
    game.set_labels_buffer_enabled(True)
    
    game.init()
    
    # Here our possible actions
    # MOVE_LEFT, MOVE_RIGHT, ATTACK
    possible_actions = np.identity(3,dtype=int).tolist()
    
    return game, possible_actions

In [45]:
def get_label(labels, label_name):
    for label in labels:
        if label.object_name == label_name: return label
    return None

In [46]:
"""
Here we performing random action to test the environment
"""
def test_environment(game, actions):
    episodes = 10
    for i in range(episodes):
        game.new_episode()
        time.sleep(1)
        while not game.is_episode_finished():
            state = game.get_state()
            
            img = state.screen_buffer
            misc = state.game_variables
            action = random.choice(actions)
            reward = game.make_action(action)
#             game.advance_action()
#             reward = game.get_last_reward()
            demon = get_label(state.labels, "Cacodemon")
            player = get_label(state.labels, "DoomPlayer")
            if demon is not None:
                print(f"{demon.object_name} x: {demon.object_position_x}, y: {demon.object_position_x} {player.object_name} x: {player.object_position_x}, y: {player.object_position_y}")
        print ("Result:", game.get_total_reward())
        time.sleep(0.05)
    game.close()

In [47]:
game, actions = create_environment()

# test_environment(game, actions)

In [48]:
MAX_Y_LEFT = -200 # Max left coordinate
MAX_Y_RIGHT = 240 # Max right coordinate

MIN_STATE = MAX_Y_LEFT - MAX_Y_RIGHT # Maximum difference between right enemy and left player
MAX_STATE = MAX_Y_RIGHT - MAX_Y_LEFT # Maximum difference between left enemy and right player

STATE_SIZE = abs(MAX_STATE - MIN_STATE) + 1 # Calculate the maximum number of states 

ACTION_SIZE = len(actions) # The number of actions

MAX_EPISODES = 1000

LEARNING_RATE = 0.7

GAMMA = 0.7

STATE_OFFSET = 440

epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability 
decay_rate = 0.005


In [49]:
def create_q_table(state_size, action_size):
    q_table = np.zeros((state_size,action_size))
    return q_table

In [50]:
q_table = create_q_table(STATE_SIZE, ACTION_SIZE)

In [51]:
def create_q(q_table):
    rows, columns = q_table.shape
    print(rows)
    def Q(state, action):
        if state > rows or state < 0 or action > columns or action < 0:
            return -100000000
        else:
            return q_table[state][action]
    return Q

In [52]:
def explore():
    tradeoff = random.uniform(0, 1)    
    return 1 if tradeoff < epsilon else 0

In [53]:
Q = create_q(q_table)
print(Q(0, 0))

881
0.0


In [54]:
def train(game):
    episode = 0
    while episode < MAX_EPISODES:
        game.new_episode()
        
        # Calculate the initial state
        game_state = game.get_state()
        demon = get_label(game_state.labels, "Cacodemon")
        player = get_label(game_state.labels, "DoomPlayer")
        state = int(demon.object_position_y - player.object_position_y)+STATE_OFFSET
        
        while not game.is_episode_finished():
            # Implement the Bellmann optimisation equation here
            
            to_explore = explore()
            
            if to_explore:
            # randomly choose an action from the current state --> exploration 
                action = random.choice(actions)           
            else:
            # choose the best action --> exploitation
                action = actions[np.argmax(q_table[state, :])]
            
            #perform an action, and wait for 5 tics before calculating the reward
            reward = game.make_action(action, 10)
            # print(f"{reward} {action}")
            game_state = game.get_state()

            if game_state is None:
            # we have reached the end of the game --> By killing the demon
                print(f"{state} {action} {reward}")
                q_table[state, np.argmax(action)] = q_table[state, np.argmax(action)] + LEARNING_RATE * (reward - q_table[state, np.argmax(action)])
                break
            else:
                ammo = game_state.game_variables[0]
                # If no ammo is there there is no point in proceeding with this episode
                if ammo == 0: break

            demon = get_label(game_state.labels, "Cacodemon")
            player = get_label(game_state.labels, "DoomPlayer")
            
            if demon is None or player is None: continue

            new_state = int(demon.object_position_y - player.object_position_y)+STATE_OFFSET
        
            q_table[state, np.argmax(action)] = q_table[state, np.argmax(action)] + LEARNING_RATE * (reward + GAMMA * 
                                    np.max(q_table[new_state, :]) - q_table[state, np.argmax(action)])
            state = new_state
        print (f"End of episode {episode}")
        epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)
        episode = episode + 1
        # print(np.sum(q_table))
    game.close()

In [55]:
train(game)

479 [0, 0, 1] 101.0
End of episode 0
441 [0, 0, 1] 101.0
End of episode 1
466 [0, 0, 1] 101.0
End of episode 2
416 [0, 0, 1] 101.0
End of episode 3
475 [0, 0, 1] 101.0
End of episode 4
419 [0, 0, 1] 101.0
End of episode 5
473 [0, 0, 1] 101.0
End of episode 6
483 [0, 0, 1] 101.0
End of episode 7
496 [0, 0, 1] 101.0
End of episode 8
397 [0, 0, 1] 101.0
End of episode 9
412 [0, 0, 1] 101.0
End of episode 10
461 [0, 0, 1] 101.0
End of episode 11
459 [0, 0, 1] 101.0
End of episode 12
412 [0, 0, 1] 101.0
End of episode 13
413 [0, 0, 1] 101.0
End of episode 14
431 [0, 0, 1] 101.0
End of episode 15
440 [0, 0, 1] 101.0
End of episode 16
408 [0, 0, 1] 101.0
End of episode 17
443 [0, 0, 1] 101.0
End of episode 18
492 [0, 0, 1] 101.0
End of episode 19
461 [0, 0, 1] 101.0
End of episode 20
462 [0, 0, 1] 101.0
End of episode 21
375 [0, 0, 1] 0.0
End of episode 22
461 [0, 0, 1] 101.0
End of episode 23
458 [0, 0, 1] 101.0
End of episode 24
418 [0, 0, 1] 101.0
End of episode 25
407 [0, 0, 1] 101.0
End 

405 [0, 0, 1] 101.0
End of episode 215
404 [0, 0, 1] 101.0
End of episode 216
441 [0, 0, 1] 101.0
End of episode 217
418 [0, 0, 1] 101.0
End of episode 218
489 [0, 0, 1] 101.0
End of episode 219
414 [0, 0, 1] 101.0
End of episode 220
458 [0, 0, 1] 101.0
End of episode 221
440 [0, 0, 1] 101.0
End of episode 222
465 [0, 0, 1] 101.0
End of episode 223
453 [0, 0, 1] 101.0
End of episode 224
415 [0, 0, 1] 101.0
End of episode 225
483 [0, 0, 1] 101.0
End of episode 226
452 [0, 0, 1] 101.0
End of episode 227
401 [0, 0, 1] 101.0
End of episode 228
467 [0, 0, 1] 101.0
End of episode 229
441 [0, 0, 1] 101.0
End of episode 230
474 [0, 0, 1] 101.0
End of episode 231
418 [0, 0, 1] 101.0
End of episode 232
408 [0, 0, 1] 101.0
End of episode 233
387 [0, 0, 1] 101.0
End of episode 234
448 [0, 0, 1] 101.0
End of episode 235
416 [0, 0, 1] 101.0
End of episode 236
464 [0, 0, 1] 101.0
End of episode 237
442 [0, 0, 1] 101.0
End of episode 238
439 [0, 0, 1] 101.0
End of episode 239
431 [0, 0, 1] 101.0
End o

398 [0, 0, 1] 101.0
End of episode 429
477 [0, 0, 1] 101.0
End of episode 430
483 [0, 0, 1] 101.0
End of episode 431
470 [0, 0, 1] 101.0
End of episode 432
456 [0, 0, 1] 101.0
End of episode 433
459 [1, 0, 0] 101.0
End of episode 434
446 [0, 0, 1] 101.0
End of episode 435
444 [0, 0, 1] 101.0
End of episode 436
392 [0, 0, 1] 101.0
End of episode 437
474 [0, 0, 1] 101.0
End of episode 438
414 [0, 0, 1] 101.0
End of episode 439
466 [0, 0, 1] 101.0
End of episode 440
409 [0, 0, 1] 101.0
End of episode 441
435 [0, 0, 1] 101.0
End of episode 442
410 [0, 0, 1] 101.0
End of episode 443
450 [0, 0, 1] 101.0
End of episode 444
469 [0, 0, 1] 101.0
End of episode 445
404 [0, 0, 1] 101.0
End of episode 446
458 [0, 0, 1] 101.0
End of episode 447
435 [0, 0, 1] 101.0
End of episode 448
415 [0, 0, 1] 101.0
End of episode 449
402 [0, 0, 1] 101.0
End of episode 450
420 [0, 0, 1] 101.0
End of episode 451
452 [0, 0, 1] 101.0
End of episode 452
395 [0, 0, 1] 101.0
End of episode 453
393 [0, 0, 1] 101.0
End o

428 [0, 0, 1] 101.0
End of episode 643
433 [0, 1, 0] 101.0
End of episode 644
463 [0, 0, 1] 101.0
End of episode 645
414 [0, 0, 1] 101.0
End of episode 646
432 [0, 0, 1] 101.0
End of episode 647
431 [0, 0, 1] 101.0
End of episode 648
466 [0, 0, 1] 101.0
End of episode 649
441 [0, 0, 1] 101.0
End of episode 650
End of episode 651
434 [0, 0, 1] 101.0
End of episode 652
419 [0, 0, 1] 101.0
End of episode 653
419 [0, 0, 1] 101.0
End of episode 654
411 [0, 0, 1] 101.0
End of episode 655
End of episode 656
461 [0, 0, 1] 101.0
End of episode 657
427 [0, 0, 1] 101.0
End of episode 658
426 [0, 0, 1] 101.0
End of episode 659
450 [0, 0, 1] 101.0
End of episode 660
411 [0, 0, 1] 101.0
End of episode 661
454 [0, 0, 1] 101.0
End of episode 662
445 [0, 0, 1] 101.0
End of episode 663
452 [0, 0, 1] 101.0
End of episode 664
427 [0, 0, 1] 101.0
End of episode 665
425 [0, 0, 1] 101.0
End of episode 666
456 [0, 0, 1] 101.0
End of episode 667
430 [0, 0, 1] 101.0
End of episode 668
420 [0, 0, 1] 101.0
End of

473 [0, 0, 1] 101.0
End of episode 857
480 [0, 0, 1] 101.0
End of episode 858
460 [0, 0, 1] 101.0
End of episode 859
393 [0, 0, 1] 101.0
End of episode 860
433 [0, 0, 1] 101.0
End of episode 861
421 [0, 0, 1] 101.0
End of episode 862
443 [0, 0, 1] 101.0
End of episode 863
444 [0, 0, 1] 101.0
End of episode 864
446 [0, 0, 1] 101.0
End of episode 865
413 [0, 0, 1] 101.0
End of episode 866
472 [0, 0, 1] 101.0
End of episode 867
436 [0, 0, 1] 101.0
End of episode 868
409 [0, 0, 1] 101.0
End of episode 869
485 [0, 0, 1] 101.0
End of episode 870
449 [0, 0, 1] 101.0
End of episode 871
440 [0, 0, 1] 101.0
End of episode 872
441 [0, 0, 1] 101.0
End of episode 873
End of episode 874
End of episode 875
431 [0, 0, 1] 101.0
End of episode 876
481 [0, 0, 1] 101.0
End of episode 877
468 [0, 0, 1] 101.0
End of episode 878
393 [0, 0, 1] 101.0
End of episode 879
423 [0, 0, 1] 101.0
End of episode 880
474 [0, 0, 1] 101.0
End of episode 881
417 [0, 0, 1] 101.0
End of episode 882
480 [0, 0, 1] 101.0
End of

In [58]:
print(q_table)

[[ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+

In [57]:
pprint(q_table)

array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00

In [80]:
def test_q_table(game, q_table):
    while 1:
        game.new_episode()
        while not game.is_episode_finished():
            game_state = game.get_state()
            demon = get_label(game_state.labels, "Cacodemon")
            player = get_label(game_state.labels, "DoomPlayer")
            if demon is None or player is None:
                print("error")
                break
            state = int(demon.object_position_y - player.object_position_y)+STATE_OFFSET
            action = np.argmax(q_table[state, :])
            game_action = actions[action]
            reward = game.make_action(game_action)
            time.sleep(0.05)
        print(game.get_total_reward())

In [81]:
game, actions = create_environment()

In [None]:
test_q_table(game, q_table)

error
101.0
error
96.0
error
101.0
error
96.0
error
101.0
error
96.0
error
101.0
error
101.0
error
91.0
error
101.0
error
96.0
error
96.0
error
101.0
error
96.0
error
96.0
error
96.0
error
96.0
error
96.0
error
101.0
