In [42]:
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import random_split
from torch.utils.data import DataLoader
from torchvision import models
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [43]:
"""
Enables GPU for faster training
"""

seed = 0
np.random.seed(seed)
torch.manual_seed(seed)

if torch.cuda.is_available():
  torch.backends.cudnn.deterministic = True
  torch.cuda.manual_seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [44]:
"""
Defines a custom dataset in order to implement pytorch's built in model capabilities
"""

class RoverDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file, sep=";")
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = self.data["Path"][idx]
        index = img_name.find("robocam")
        local_path = "RoverImages/" + img_name[index:]
        image = Image.open(local_path)

        steering_angle = self.data["SteerAngle"][idx]
        throttle = self.data["Throttle"][idx]
        
        steering_angle_tensor = torch.tensor(steering_angle, dtype=torch.float32)
        throttle_tensor = torch.tensor(throttle, dtype=torch.float32)
        
        if self.transform:
            image = self.transform(image)

        return {'image': image, 'SteerAngle': steering_angle, 'Throttle': throttle}
        

In [45]:
"""
Defines a transformation of the collected images. The mean and std are commonly used image normalization values but can be experimented with.
"""

transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor(),          
    transforms.Normalize(            
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [46]:
sample_dataset = RoverDataset(csv_file ='robot_log.csv', root_dir='RoverImages', transform=transform)

In [47]:
"""
Split dataset into 90% train and 10% test
"""

train_size = int(0.9 * len(sample_dataset))
test_size = len(sample_dataset) - train_size

train_set, test_set = random_split(sample_dataset, [train_size, test_size])

In [48]:
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=True)

In [49]:
"""
Decision to use pretrained resnet18 was somewhat arbitratry and would be good to experiment with other architectures
"""

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2)

In [50]:
"""
Opted to do a regression model. Both the loss function and optimizer would be good hyperparameters to experiment with
"""

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [51]:
"""
The model is trained on about 1500 images which is somewhat small. Didn't use a large number of epochs since the resnet model is pretrained.
"""

model.to(device)

for param in model.parameters():
    param.to(device)

num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch in train_loader:
        images, targets = batch['image'].to(device), torch.stack((batch['SteerAngle'], batch['Throttle']), dim=1).to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets.float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {epoch_loss:.4f}")

    model.eval()
    test_loss = 0.0
    with torch.no_grad():
        for batch in test_loader:
            images, targets = batch['image'].to(device), torch.stack((batch['SteerAngle'], batch['Throttle']), dim=1).to(device)
            outputs = model(images)
            loss = criterion(outputs, targets.float())
            test_loss += loss.item() * images.size(0)
    test_loss /= len(test_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Test Loss: {test_loss:.4f}")


Epoch 1/20, Training Loss: 18.9514
Epoch 1/20, Test Loss: 27.0057
Epoch 2/20, Training Loss: 12.2563
Epoch 2/20, Test Loss: 9.8107
Epoch 3/20, Training Loss: 9.0329
Epoch 3/20, Test Loss: 8.9149
Epoch 4/20, Training Loss: 5.7581
Epoch 4/20, Test Loss: 3.3495
Epoch 5/20, Training Loss: 4.3156
Epoch 5/20, Test Loss: 8.1436
Epoch 6/20, Training Loss: 3.1382
Epoch 6/20, Test Loss: 4.9490
Epoch 7/20, Training Loss: 2.9653
Epoch 7/20, Test Loss: 3.8451
Epoch 8/20, Training Loss: 2.7402
Epoch 8/20, Test Loss: 6.5799
Epoch 9/20, Training Loss: 3.5730
Epoch 9/20, Test Loss: 2.5921
Epoch 10/20, Training Loss: 2.2101
Epoch 10/20, Test Loss: 5.1011
Epoch 11/20, Training Loss: 2.1842
Epoch 11/20, Test Loss: 2.0936
Epoch 12/20, Training Loss: 1.9842
Epoch 12/20, Test Loss: 3.9776
Epoch 13/20, Training Loss: 1.6017
Epoch 13/20, Test Loss: 1.4471
Epoch 14/20, Training Loss: 1.4761
Epoch 14/20, Test Loss: 1.4639
Epoch 15/20, Training Loss: 1.3899
Epoch 15/20, Test Loss: 1.3192
Epoch 16/20, Training Los