In [7]:
import random
import time
import warnings

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from collections import deque
from vizdoom import *

In [8]:
def create_environment():
    game = DoomGame()    
    game.load_config("C:/Python36/Lib/site-packages/vizdoom/scenarios/defend_the_center.cfg")
    game.set_doom_scenario_path("C:/Python36/Lib/site-packages/vizdoom/scenarios/defend_the_center.wad")
    game.init()
    
    return game, np.identity(3,dtype=int).tolist()
    
def preprocess_frame(frame):
    if frame.shape[0] == 3:
        frame = np.mean(frame, axis=0)
    cropped_frame = frame[40:, :]
    normalized_frame = np.divide(cropped_frame, 255)
    
    return transform.resize(normalized_frame, [100, 160])

stack_size = 4 
# Initialize deque
stacked_frames  =  deque([np.zeros((100, 160), dtype=np.int) for i in range(stack_size)], maxlen=4) 

def stack_frames(stacked_frames, state, is_new_episode):
    frame = preprocess_frame(state)    
    if is_new_episode:
        stacked_frames = deque([np.zeros((100, 160), dtype=np.int) for i in range(stack_size)], maxlen=4)
        for i in range(4):
            stacked_frames.append(frame)
        stacked_state = np.stack(stacked_frames, axis=2)        
    else:
        stacked_frames.append(frame)
        stacked_state = np.stack(stacked_frames, axis=2) 
    
    return stacked_state, stacked_frames

def discount_and_normalize_rewards(episode_rewards):
    discounted_episode_rewards = np.zeros_like(episode_rewards)
    cumulative = 0.0
    for i in reversed(range(len(episode_rewards))):
        cumulative = cumulative * gamma + episode_rewards[i]
        discounted_episode_rewards[i] = cumulative
        
    mean = np.mean(discounted_episode_rewards)
    std = np.std(discounted_episode_rewards)
    discounted_episode_rewards = (discounted_episode_rewards - mean) / (std)
    
    return discounted_episode_rewards