Deep Convolutional Q-Learning

Eligibility Trace - takes into account more than one step at a time, knows the 'end' state, the cumulative reward
Let's us keep track of what step in the chain is 'eligible' to be updated - based on the cumulative reward

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as Variable

In [2]:
import gym
from gym.wrappers import SkipWrapper
from ppaquette_gym_doom.wrappers.action_space import ToDiscrete

ModuleNotFoundError: No module named 'gym'

In [5]:
# Making the Brain

class CNN(nn.Module):
    
    def __init__(self, number_actions):
        super(CNN, self).__init__() # inheritance
        self.convolution1 = nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size = 5)
        self.convolution2 = nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 3)
        self.convolution3 = nn.Conv2d(in_channels = 32, out_channels= 64, kernel_size = 2)
        self.fc1 = nn.Linear(in_features= self.count_neurons((1, 80, 80)), out_features = 40) # just make a function to get what we are missing
        self.fc2 = nn.Linear(in_features= 40, out_features = number_actions)
        
    def count_neurons(self, image_dim):
        x = Variable(torch.rand(1, *image_dim))
        x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2))
        x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2))
        x = F.relu(F.max_pool2d(self.convolution3(x), 3, 2))
        return x.data.view(1, -1).size(1)
    
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2))
        x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2))
        x = F.relu(F.max_pool2d(self.convolution3(x), 3, 2))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
 

In [6]:
# Making the Body
class SoftmaxBody(nn.Module):

    def __init__(self, T): 
        super(CNN, self).__init__()
        self.T = T
        
    def forward(self, outputs):
        probs = F.softmax(outputs * self.T)
        actions = probs.multinomial()
        return actions
    

In [7]:
# Making the Ai            

class AI:
    
    def __init__(self, brain, body):
        self.brain = brain
        self.body = body
        
    def __call__(self, inputs):
        input = Variable(torch.from_numpy(np.array(inputs, dtype = np.float32)))
        output = self.brain(input)
        actions = self.body(output)
        return actions.data.numpy()

In [None]:
# Implementing deep-convolutional Q-learning