In [5]:
from __future__ import print_function
import vizdoom as vzd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import random

from vizdoom import GameVariable
from time import sleep
from matplotlib import pyplot as plt
from collections import deque

In [6]:
# Sets time that will pause the engine after each action (in seconds)
# Without this everything would go too fast for you to keep track of what's happening.
sleep_time = 1.0 / vzd.DEFAULT_TICRATE  # = 0.028

In [7]:
def create_game():
    # Create DoomGame instance. It will run the game and communicate with you.
    game = vzd.DoomGame()

    # Now it's time for configuration!
    # load_config could be used to load configuration instead of doing it here with code.
    # If load_config is used in-code configuration will also work - most recent changes will add to previous ones.
    # game.load_config("my_basic.cfg") TODO

    # Sets path to additional resources wad file which is basically your scenario wad.
    # If not specified default maps will be used and it's pretty much useless... unless you want to play good old Doom.
    game.set_doom_scenario_path("../scenarios/basic.wad")

    # Sets map to start (scenario .wad files can contain many maps).
    game.set_doom_map("map01")

    # Sets resolution. Default is 320X240
    game.set_screen_resolution(vzd.ScreenResolution.RES_640X480)

    # Sets the screen buffer format. Not used here but now you can change it. Default is CRCGCB.
    game.set_screen_format(vzd.ScreenFormat.RGB24)

    # Enables depth buffer.
    game.set_depth_buffer_enabled(True)

    # Enables labeling of in game objects labeling.
    game.set_labels_buffer_enabled(True)

    # Enables buffer with top down map of the current episode/level.
    game.set_automap_buffer_enabled(True)

    # Enables information about all objects present in the current episode/level.
    game.set_objects_info_enabled(True)

    # Enables information about all sectors (map layout).
    game.set_sectors_info_enabled(True)

    # Sets other rendering options (all of these options except crosshair are enabled (set to True) by default)
    game.set_render_hud(False)
    game.set_render_minimal_hud(False)  # If hud is enabled
    game.set_render_crosshair(True)
    game.set_render_weapon(True)
    game.set_render_decals(False)  # Bullet holes and blood on the walls
    game.set_render_particles(False)
    game.set_render_effects_sprites(False)  # Smoke and blood
    game.set_render_messages(False)  # In-game messages
    game.set_render_corpses(False)
    game.set_render_screen_flashes(True)  # Effect upon taking damage or picking up items

    # Adds buttons that will be allowed.
    game.add_available_button(vzd.Button.MOVE_LEFT)
    game.add_available_button(vzd.Button.MOVE_RIGHT)
    game.add_available_button(vzd.Button.ATTACK)

    # Adds game variables that will be included in state.
    game.add_available_game_variable(vzd.GameVariable.AMMO2)

    # Causes episodes to finish after 200 tics (actions)
    game.set_episode_timeout(200)

    # Makes episodes start after 10 tics (~after raising the weapon)
    game.set_episode_start_time(10)

    # Makes the window appear (turned on by default)
    game.set_window_visible(True)

    # Turns on the sound. (turned off by default)
    game.set_sound_enabled(False)

    # Sets the living reward (for each move) to -1
    game.set_living_reward(-1)

    # Sets ViZDoom mode (PLAYER, ASYNC_PLAYER, SPECTATOR, ASYNC_SPECTATOR, PLAYER mode is default)
    game.set_mode(vzd.Mode.PLAYER)

    # Define some actions. Each list entry corresponds to declared buttons:
    # MOVE_LEFT, MOVE_RIGHT, ATTACK
    # game.get_available_buttons_size() can be used to check the number of available buttons.
    # 5 more combinations are naturally possible but only 3 are included for transparency when watching.
    actions = [[True, False, False], [False, True, False], [False, False, True]]
    
    return game, actions

In [26]:
def run(game, agent, actions, episodes, verbose=True, print_step_info=False):
    game.init()

    for i in range(episodes):
        game.new_episode()
        global_step = 0
        done = False
        print("Episode #" + str(i + 1))
        
        stack_size = 4
        stacked_frames = deque([torch.zeros((299 , 399)) for i in range(stack_size)], maxlen=stack_size)
        observation = preprocess_stacked_frames(stacked_frames)
        # fill the initial deque with zeros of the same shape the frame is after preprocessing

        while not game.is_episode_finished():
            old_observation = observation
            state = game.get_state()
            frame = preprocess_frame(state.screen_buffer)
            stacked_frames.append(frame)
                
            observation = preprocess_stacked_frames(stacked_frames)
            action = agent.get_action(observation)
            reward = game.make_action(action)
            done = game.is_episode_finished()
            agent.append_memory(old_observation, torch.tensor(action),
                                torch.tensor(reward), observation, torch.tensor(done))
            
            if global_step > agent.batch_size:
                agent.train()
            
            if print_step_info:
                print("State #" + str(state.number))
                print("Reward:", reward)
                print("=====================")

            if sleep_time > 0:
                sleep(sleep_time)
            global_step += 1
        if verbose:        
            print("Episode finished.")
            print("Total reward:", game.get_total_reward())
            print("************************")

    game.close()

In [27]:
def preprocess_frame(img):
    rgb_weights = [0.2989, 0.5870, 0.1140]
    img = img @ rgb_weights
    img = img[181:,121:-120]
    
    return torch.tensor(img).float()

In [28]:
def preprocess_stacked_frames(stacked_frames):
    stack = torch.stack(tuple(stacked_frames))
    return stack

In [29]:
def plot_state(state, gray):
    plt.figure(figsize=(12,8))
    if gray:
        plt.imshow(preprocess(screen), "gray");
    else:
        plt.imshow(screen)

In [30]:
def to_categorical(x, action_size):
    return torch.eye(action_size)[x].squeeze().bool()

In [31]:
class Qnet(nn.Module):
    def __init__(self, action_size):
        super(Qnet, self).__init__()
        self.action_size = action_size
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(4, 16, 3, padding=0, stride=2, bias=False), # (16, 149, 199)
            nn.BatchNorm2d(16),
            nn.ReLU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 3, padding=(1,0), stride=2, bias=False), # (32, 75, 99)
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=0, stride=2, bias=False), # (64, 37, 49)
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=0, stride=2, bias=False), # (128, 18, 24)
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        self.fc1 = nn.Sequential(
            nn.Linear(128, 100),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(100, self.action_size),
            nn.Softmax(0)
        )
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = x.mean((2, 3)) # global average pool
        x = x.view(-1, 128)
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x

In [1]:
class DQNAgent:
    def __init__(self, action_size, actions, epsilon=0.99, memory_size=1000, 
                 batch_size=64, discount_factor=0.99, lr=1e-4, epsilon_decay=0.99): # TOdo use eps decay
        self.action_size = action_size
        self.q_net = Qnet(action_size)
        self.epsilon = epsilon
        self.actions = actions
        self.batch_size = batch_size
        self.discount = discount_factor
        self.lr = lr
        self.memory = deque(maxlen=memory_size)
        self.opt = optim.Adam(self.q_net.parameters(), lr=self.lr)
    
    def get_action(self, state):
        if np.random.uniform() < self.epsilon:
            return random.choice(self.actions)
        else:
            action = torch.argmax(self.q_net(state.unsqueeze(0))).item()
            return self.actions[action]
        
    def append_memory(self, state, action, reward, next_state, done):
        # state is the last 4 frames stacked here
        # make multiple memory for performance optimization
        self.memory.append((state, action, reward, next_state, done))
    
    def train(self):
        self.opt.zero_grad()
        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = [], [], [], [], []
        
        for sample in batch:
            states.append(sample[0])
            actions.append(sample[1])
            rewards.append(sample[2])
            next_states.append(sample[3])
            dones.append(sample[4])
        
        states = torch.stack(states)
        actions = torch.stack(actions).bool()
        actions = to_categorical(actions, self.action_size)
        rewards = torch.stack(rewards)
        next_states = torch.stack(next_states)
        dones = torch.stack(dones).bool()
        not_dones = ~dones
        
        state_values = self.q_net(states)[actions]
        next_state_values = torch.max(self.q_net(next_states), 0)
        next_state_values = next_state_values.values[not_dones.squeeze()]
        
        Y = rewards.detach().clone() # is detaching correct?
        Y[not_dones] += self.discount * next_state_values
        
        loss = torch.sum((Y - state_values) ** 2) / self.batch_size
        loss.backward()
        self.opt.step()
        

In [2]:
game, actions = create_game()
agent = DQNAgent(3, actions)

NameError: name 'create_game' is not defined

In [34]:
game.close()

In [35]:
run(game, agent, actions, 10, print_step_info=False)
game.close()

Episode #1


IndexError: The shape of the mask [64, 3] at index 0does not match the shape of the indexed tensor [3, 3] at index 0

In [None]:
frames = preprocess_stacked_frames(screen)

In [247]:
q = Qnet(action_size=3)

In [241]:
mem = deque([(frames, torch.randn(1), torch.randn(1), frames, torch.randn(1)) for _ in range(10)])

In [242]:
batch = random.sample(mem, 3)

In [323]:
st, ac, r, nst, d = [], [], [], [], []
for x in batch:
    st.append(x[0])
    ac.append(x[1])
    r.append(x[2])
    nst.append(x[3])
    d.append(x[4])
    
st = torch.stack(st)
ac = torch.stack(ac).long()
r = torch.stack(r)
nst = torch.stack(nst)
d = torch.stack(d).bool()
d[1] = False

Y = r.detach().clone()

In [324]:
Y

tensor([[ 0.4415],
        [-0.6384],
        [ 0.3228]])

In [325]:
nd = ~d

In [326]:
torch.max(q(nst), 0).values[nd.squeeze()]

tensor([0.3333], grad_fn=<IndexBackward>)

In [327]:
Y[nd] += 0.99 * torch.max(q(nst), 0).values[nd.squeeze()]

In [328]:
Y

tensor([[ 0.4415],
        [-0.3084],
        [ 0.3228]], grad_fn=<IndexPutBackward>)

In [419]:
foo[to_categorical(ac, 3)]

tensor([ 0.6510, -0.8645, -0.5376])