In [4]:
import os
import random
import numpy as np
import torch 
import torch.nn as nn # neural network module
import torch.optim as optim # optimizers
import torch.nn.functional as F 
import torch.autograd as autograd # for stochastci gradient descent 
from torch.autograd import variable
from collections import deque, namedtuple
import gymnasium as gym
from PIL import Image
from torchvision import transforms

Network Architecture

In [5]:
class Network(nn.Module):
    def __init__(self, action_size, seed=42):
        super(Network, self).__init__()
        self.seed = torch.manual_seed(seed)
        # Giving Eyes to the Agent in the form of a CNN
        self.conv1 = nn.Conv2d(3, 32, kernel_size=8, stride=4) # 3 input channels refer to RGB
        # When performing a convolution operation, the stride determines how many units the filter shifts at each step.
        self.bn1 = nn.BatchNorm2d(32) # num features should be same as the output size of the previous layer
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=1)
        self.bn4 = nn.BatchNorm2d(128)
        # Giving a brain to the Agent in the form of an ANN
        self.fc1 = nn.Linear(10*10*128, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, action_size)

        ### know the equation for number of flattened neurons

    def forward(self, state):
        # propogates the images to the CNN
        x = F.relu(self.bn1(self.conv1(state)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = x.view(x.size(0), -1) # keeps the 1st dimenions and flattens the rest
        F.relu(self.fc1(x)) 
        F.relu(self.fc2(x))

        return self.fc3(x)

Setting up the environment

In [6]:
env = gym.make('MsPacmanDeterministic-v4', full_action_space=False)
state_shape = env.observation_space.shape
state_size = env.observation_space.shape[0]
number_actions = env.action_space.n

print(state_shape)
print(state_size)
print(number_actions)

(210, 160, 3)
210
9


A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


Initialize Hyperparameters

In [7]:
learning_rate = 5e-4
minibatch_size = 64
discount_factor = 0.99
# we wont be implementing experience replay since the inputs are not vectors anymore
# instead they are images which need a lot of memory implemented

Preprocess Images from frames

In [10]:
def preprocess_frame(frame):
    # the input frame is in the form of a numpy array
    frame = Image.fromarray(frame)
    preprocess = transforms.Compose([transforms.Resize(128, 128), transforms.ToTensor()])

    return preprocess(frame).unsqueeze(0) # adding an extra dimension for the batch id