# Jetracer Convolutional Neural Network Model Training Notebook
Author: George Gorospe (updated 1/12/2024)



About: In this notebook we'll use the the data we previously collected to train a Convolutional Neural Network (CNN). This network will take as input an image from the racer's camera, and output the inferred driving directions in the form of a steering angle.

In [None]:
# Importing required libraries
import torch
import torchvision
import torchvision.transforms as transforms
from xy_dataset import XYDataset
import numpy as np

from torch.utils.data import Subset
from sklearn.model_selection import train_test_split
from torchvision.transforms import Compose, ToTensor, Resize
from torch.utils.data import DataLoader

from matplotlib import pyplot as plt
import cv2, glob, os, fnmatch, time
from datetime import datetime
from matplotlib.patches import Circle


#Loading a GPU if avaliable and otherwise a CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
# Locating Dataset for Training

################# [REQUIRED ACTION] Select the Directory with your Dataset ##########################
DATASET_DIR = "/Datasets"
dataset_folder_name = DATASET_DIR.split("/")[-2]

# Information about the dataset, number of data points and a listing of the data points.
num_files =  len(glob.glob(os.path.join(DATASET_DIR, '*.jpg')))
file_list = fnmatch.filter(os.listdir(DATASET_DIR), '*.jpg')
if num_files > 0:
  print("Dataset found!")
  print("Number of files found in datadset: " + str(num_files))
elif num_files == 0:
  print("No data in selected directory, choose again?")

In [None]:
# Creating our dataset object. This object parses the file names to get the labels for each datapoint

TRANSFORMS = transforms.Compose([
    #transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),
    #transforms.ColorJitter(brightness=.5, hue=.3),
    #transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

Sample_Dataset = XYDataset(DATASET_DIR,TRANSFORMS)

In [None]:
# Using sklearn to split dataset into training and evaluation subsets

def train_val_dataset(dataset, val_split=0.25):
    train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split)
    datasets = {}
    datasets['train'] = Subset(dataset, train_idx)
    datasets['evaluate'] = Subset(dataset, val_idx)
    return datasets

In [None]:
# Let's see the size of each training set
datasets = train_val_dataset(Sample_Dataset)
print(f"Training Dataset: {len(datasets['train'])} data points.")
print(f"Evaluation Dataset: {len(datasets['evaluate'])} data points.")

In [None]:
# Creating the Dataloaders for both the 'train' and the 'eval' datasets
# Here the datasets ('train' and 'evaluate') are input into DataLoaders
# DataLoaders deliver the data to the training algorithm when requested.
# They deliver the data in 'minibatches' , and reshuffle the data for each epoch
train_dataloader = DataLoader(datasets['train'], batch_size=64, shuffle=True)
test_dataloader = DataLoader(datasets['evaluate'], batch_size=64, shuffle=True)

In [None]:
# Checking GPU Status

# Iterate through the DataLoader
images, ann, xy = next(iter(train_dataloader))
# send data to device
images = images.to(device)
xy = xy.to(device)

# GPU Test
print(f"Images on GPU: {images.is_cuda}")
print(f"Labels on GPU: {xy.is_cuda}")

print(f"Index for the GPU device: {torch.cuda.current_device()}")


In [None]:
# Plot Image, Label, and File Name

# Get an image, associated annotations, and training labels from the training dataloader
train_image, ann, train_labels = next(iter(train_dataloader))

# Converting the image from tensor to numpy array
img_out = train_image.numpy()[0]
img_out = np.moveaxis(img_out, 0, -1)

# Onverting the label to xy
x = train_labels[1].numpy()[0] # Converting from tensor
x = int(224 * (x / 2.0 + 0.5)) # mapping the [-1,1] range to [0,224] range
print(f"X Label: {x}")

fig = plt.figure(figsize= (7, 7))
ax = fig.add_axes()
circ = Circle((x,112),15)


ax.add_patch(circ)

fig.imshow(img_out)


In [None]:
# Plot a grid of images with their labels
fig = plt.figure(figsize= (14, 14))
for i in range (0,12):
  # Create a grid for our images
  ax = fig.add_subplot(4, 4, i+1)
 
  # Use the train_data loader to get images and labels from the 
  # training dataset
  train_image, ann, train_labels = next(iter(train_dataloader))
  train_image = train_image.numpy()[0]
  train_image = np.moveaxis(train_image, 0, -1)
  x = int(train_labels[i].numpy()[0])
  x = int(224 * (x / 2.0 + 0.5))

  # From the annotations we get the solution and plot it
  circ = Circle((x,112),15)
  ax.add_patch(circ)
  ax.text(10,20,ann['image_path'][0][26:30])
  ax.text(10,40, str(x))
  ax.imshow(train_image)

In [None]:
# Model Setup
# RESNET 18
output_dim = 2

# Loading a pretrained ResNet18 model.
# Note: we're going to retrain all layers of this model
# This decision was made based on the amount of data available and the complexity of the task
model = torchvision.models.resnet18(pretrained=True)

# RESNET 34
#model = torchvision.models.resnet34(pretrained=True)


# If you wanted to train fewer of the layers (freeze some layers)
#Freeze all of the weights in ResNet18
#for param in model.parameters():
#  param.requires_grad = False

# Adding a fully connected layer to the top/head of the model
model.fc = torch.nn.Linear(512, output_dim)

model.to(device)

In [None]:
# Training Setup
BATCH_SIZE = 128

optimizer = torch.optim.Adam(model.parameters())

In [None]:
epochs = 1
model_file_name = 'Models/my_new_model.pth'
torch.backends.cudnn.benchmark = True

def training_and_evaluation(epochs):
  # Training Timing
  start_time = datetime.now()

  # Writing training details to training log
  f = open("training_log.txt", "a")
  f.write("\n")
  dt_string = start_time.strftime("%m/%d/%Y %H:%M:%S")
  f.write(f"Training Report: {dt_string} \n")
  f.write(f"Selected Dataset: {dataset_folder_name}, Number Data Points: {num_files}\n")
  f.write(f"Model: {model.__class__.__name__}{18}, Epochs: {epochs}, Batch Size: {BATCH_SIZE}\n")
  # Remaining details will be written at the end of the training

  ############# Initiating Training Process ##############
  # First set model to train mode
  model.train()

  print("Starting training process ...")
  # Start training process dependent on number of epochs
  while epochs > 0:
    print("######### Epoch: " + str(epochs) + " #########")
    # Index
    i = 0
    sum_loss = 0.0
    error_count = 0.0

    # Training Loop
    # Process each batch of data points in the train loader
    for images, category_idx, xy in iter(train_dataloader):
      # send data to device
      images = images.to(device)
      xy = xy.to(device)

      # zero gradients of parameters
      optimizer.zero_grad()

      # execute model to get outputs
      outputs = model(images)

      # compute MSE loss over x, y coordinates for associated categories
      loss = 0.0
      loss += torch.mean((outputs - xy)**2)
      #for batch_idx, cat_idx in enumerate(list(category_idx.flatten())):
      #    loss += torch.mean((outputs[batch_idx][2 * cat_idx:2 * cat_idx+2] - xy[batch_idx])**2)
      #loss /= len(category_idx)

      # run backpropogation to accumulate gradients
      loss.backward()

      # step optimizer to adjust parameters
      optimizer.step()

      # increment progress
      # NO TRAINING ACCURACY: no correct answer for regression, only loss
      #count = len(category_idx.flatten())
      #i += count
      i += len(xy)
      sum_loss += float(loss)
      #progress_widget.value = i / len(dataset)
      #loss_widget.value = sum_loss / i


      print("Loss: " + str(sum_loss/i))

    #sum_loss.append(totalLoss)
    #print(f"Training Accuracy: {testAccuracy / len(training)}")


    # Evaluation Loop
    i = 0
    evaluation_loss = 0.0
    for images, category_idx, xy in test_dataloader:

        # Put the model into evaluation mode
        model.eval()

        # send data to device
        images = images.to(device)
        xy = xy.to(device)

        # execute model to get outputs
        outputs = model(images)

        # compute MSE loss over x, y coordinates for associated categories
        loss = 0.0
        loss += torch.mean((outputs - xy)**2)
        i += len(xy)
        evaluation_loss += float(loss)
    print(f"Validation Accuracy: {evaluation_loss / i}")
    #Save our model for each epoch
    #torch.save(model.state_dict(), file)

    # End of the current epoch
    epochs = epochs -1
  end_time = datetime.now()

  # get the execution time
  elapsed_time = end_time - start_time
  training_duration_time_formatted = str(elapsed_time)
  print('Execution time:', training_duration_time_formatted)


  f.write(f"Final model evaluation loss: {evaluation_loss/i}\n")
  f.write(f"Total training & evaluation time: {training_duration_time_formatted}\n")
  f.write(f"Model File Name: {model_file_name}")
  f.write("\n")
  f.close()
  
  return model



model = training_and_evaluation(epochs)
torch.save(model.state_dict(), model_file_name)


In [None]:
# Model output test

# Iterate through the DataLoader
train_images, ann, train_labels = next(iter(train_dataloader))

# send data to device
images = train_images.to(device)
xy = train_labels.to(device)
outputs = model(images)

# Check