In [1]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [2]:
!pip install ALE
!pip install gym[atari,accept-rom-license]==0.21.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ALE
  Downloading Ale-0.8.4.tar.gz (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.4/53.4 KB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ALE
  Building wheel for ALE (setup.py) ... [?25l[?25hdone
  Created wheel for ALE: filename=Ale-0.8.4-py3-none-any.whl size=70176 sha256=aa67f6bc0d0c473c5a5fa4661f3252741bef72f8faec184ac06e11982f7d0b10
  Stored in directory: /root/.cache/pip/wheels/90/6e/89/be043555e2e48a57e1797b91174868898b7545a305178016cb
Successfully built ALE
Installing collected packages: ALE
Successfully installed ALE-0.8.4
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gym[accept-rom-license,atari]==0.21.0
  Downloading gym-0.21.0.tar.gz (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

## Packages

In [7]:
from torch.utils.data.dataset import Dataset
from torchvision import transforms
import torch
import torch.utils
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.dataset import Dataset, random_split
import gym
import numpy as np
import pickle

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

## Pytorch Dataset

In [4]:
class ExpertDataSet(Dataset):
    def __init__(self, expert_observations, expert_actions):
        self.observations = expert_observations
        self.actions = expert_actions
        self.img_transforms=transforms.Compose([transforms.ToTensor()])

    def __getitem__(self, index):
        return (self.img_transforms(self.observations[index]), self.actions[index])

    def __len__(self):
        return len(self.observations)

## Neural Network

In [45]:
class ImitationAgent(nn.Module):
  def __init__(self, num_actions):
    super(ImitationAgent, self).__init__()
    self.gs = transforms.Grayscale()
    self.rs = transforms.Resize((64,64))
    
    ## Activation functions
    self.relu = nn.ReLU()

    ## Convo Layers
    self.c1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=7)
    

    self.c2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5)
    

    ## FC Layers
    self.fc1 = nn.Linear(in_features=32, out_features=32)
    self.fc2 = nn.Linear(in_features=32, out_features=num_actions)

  def forward(self, x):
    ## 1st Convo Layer
    x = self.c1(x)
    x = self.relu(x)
    

    ## 2nd Convo Layer
    x = self.c2(x)
    x = self.relu(x)
    
     
    ## 1st FC Layer
    batch_size = x.shape[0]
    x = x.reshape(batch_size, 32, -1).max(axis=2).values
    x = self.fc1(x)
    x = self.relu(x)

    ## 2nd FC Layer
    x = self.fc2(x)
    p = F.softmax(x, dim=1)

    return x,p

  def act(self, state):
    # Stack 4 states
    #state = torch.vstack([self.preproc_state(state) for state in states]).unsqueeze(0).to(self.device)
    
    # Get Action Probabilities
    probs = self.forward(state).cpu()
    
    # Return Action and LogProb
    action = probs.argmax(-1)
    return action.item()
    
  def preproc_state(self, state):
    # State Preprocessing
    state = state.transpose(2,0,1) #Torch wants images in format (channels, height, width)
    state = torch.from_numpy(state)
    
    return state # normalize

## Train

In [46]:
objects = []
with (open("/content/drive/MyDrive/Colab Notebooks/RL/Project/expert_trace.pkl", "rb")) as openfile:
    while True:
        try:
            objects.append(pickle.load(openfile))
        except EOFError:
            break

obs = objects[0][0]
act = objects[0][1]
expert_dataset = ExpertDataSet(obs, act)

env = gym.make('MontezumaRevenge-v4', render_mode='rgb_array')
# Define relevant variables for the ML task
learning_rate = 0.005
num_epochs = 200
num_workers = 2
batch_size = 64
train_prop = 0.8
train_size = int(train_prop * len(expert_dataset))
test_size = len(expert_dataset) - train_size

train_expert_dataset, test_expert_dataset = random_split(expert_dataset, [train_size, test_size])
train_loader = torch.utils.data.DataLoader(  dataset=train_expert_dataset, batch_size=batch_size, shuffle=True,num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(  dataset=test_expert_dataset, batch_size=batch_size, shuffle=True,num_workers=num_workers)

student = ImitationAgent(env.action_space.n)
student.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(student.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)  


for epoch in range(num_epochs):
    for batch, (images,labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        results,_ = student(images)
        loss = criterion(results, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print('Epoch {}/{} | Batch {}/{} | Training Loss: {:.4f}'.format(epoch+1, num_epochs, batch+1, batch_size, loss.item()))

        if epoch % 100 == 0:
          net_checkpoint = "/content/drive/MyDrive/Colab Notebooks/RL/Project/imit"+str(epoch+1)+".pt"
          torch.save(student.state_dict(), net_checkpoint)

net_checkpoint = "/content/drive/MyDrive/Colab Notebooks/RL/Project/imit_final.pt"
torch.save(student.state_dict(), net_checkpoint)

Epoch 1/200 | Batch 1/64 | Training Loss: 2.9698
Epoch 1/200 | Batch 2/64 | Training Loss: 2.9398
Epoch 1/200 | Batch 3/64 | Training Loss: 2.9680
Epoch 1/200 | Batch 4/64 | Training Loss: 2.9399
Epoch 1/200 | Batch 5/64 | Training Loss: 2.9247
Epoch 1/200 | Batch 6/64 | Training Loss: 2.8951
Epoch 1/200 | Batch 7/64 | Training Loss: 2.8846
Epoch 1/200 | Batch 8/64 | Training Loss: 2.8708
Epoch 2/200 | Batch 1/64 | Training Loss: 2.8574
Epoch 2/200 | Batch 2/64 | Training Loss: 2.8171
Epoch 2/200 | Batch 3/64 | Training Loss: 2.7967
Epoch 2/200 | Batch 4/64 | Training Loss: 2.7722
Epoch 2/200 | Batch 5/64 | Training Loss: 2.7199
Epoch 2/200 | Batch 6/64 | Training Loss: 2.6986
Epoch 2/200 | Batch 7/64 | Training Loss: 2.6642
Epoch 2/200 | Batch 8/64 | Training Loss: 2.5632
Epoch 3/200 | Batch 1/64 | Training Loss: 2.5027
Epoch 3/200 | Batch 2/64 | Training Loss: 2.4962
Epoch 3/200 | Batch 3/64 | Training Loss: 2.2782
Epoch 3/200 | Batch 4/64 | Training Loss: 2.4069
Epoch 3/200 | Batch 