### 03 - Convolutional Neural Network Model Training Notebook
Author: George Gorospe, george.gorospe@nmaia.net (updated 1/3/2026)

# In this third notebook, we'll use the the data we previously collected to train an AI Pilot for our racer. 

Technically, we're using the data to train a convolutional neural network. This network uses convolutions, or special filters that are adjusted to promote the part of the image useful for driving. The training process adjusts the filters repeatedly until the model accurately drives just like you did when you drove the racer manually. In the next notebook, we'll use the model as our AI pilot for our self-driving car.


<font color='red' size='6'>IMPORTANT: Use the AC adaptor (wall power) when running this notebook.</font>

Training our machine learning model is power intensive, the robot's battery can't supply enough current for the process. This occasionally causes the computer to shutdown during training. For best results, always use wall power during training.


In [None]:
# Importing required libraries

### Machine Learning Libraries
import torch # Import the PyTorch library
from torch.utils.data import DataLoader
from torch.utils.data import Subset
import torchvision # Import the TorchVision library from PyTorch
import torchvision.transforms as transforms
from torchvision.models import ResNet18_Weights
from torchvision.transforms import Compose, ToTensor, Resize
from sklearn.model_selection import train_test_split



# IPython Libraries for display and widgets
import traitlets
import ipywidgets
import ipywidgets.widgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
from IPython.display import clear_output, display


# Custom dataset object library
from xy_dataset import XYDataset

# General Libraries 
import cv2, glob, os, fnmatch, collections, random
from datetime import datetime
from matplotlib import pyplot as plt
from matplotlib.patches import Circle
import numpy as np
from ipyfilechooser import FileChooser


# Nvidia library for images
from jetcam.utils import bgr8_to_jpeg

# Custom plot function
def live_plot(data_dict, model_file_name="model", figsize=(7,5), title='Model Error Chart:'):
    clear_output(wait=True)
    fig1 = plt.figure(figsize=figsize)
    for label,data in data_dict.items():
        plt.plot(data, label= model_file_name + ".pth") #label)
    plt.title(title)
    plt.grid(True)
    plt.xlabel('epoch')
    plt.ylabel('Model Loss x1000')
    plt.legend(loc='upper right') # the plot evolves to the right
    plt.show();
    fig1.savefig(training_chart_file_path)

# Data collection for MSE information
data = collections.defaultdict(list)


# Empty the cuda cache from previous runs, helps to free memory  
torch.cuda.empty_cache()

### Selecting a Dataset for Training
Use the following folder chooser to select the folder where your dataset is located.

In [None]:
# Create and display a FileChooser widget
fc = FileChooser('/home/student/Datasets')
display(fc)
fc.show_only_dirs = True
# Change the title (use '' to hide)
fc.title = '<b>Choose Dataset for Training</b>'

# Sample callback function
def change_title(chooser):
    chooser.title = '<b>Directory Selected.</b>'

# Register callback function
fc.register_callback(change_title)

In [None]:
# Inspecting Dataset

# Output from file chooser
DATASET_DIR = fc.selected_path
dataset_folder_name = DATASET_DIR.split("/")[-1]


# Information about the dataset, number of data points and a listing of the data points.
num_files =  len(glob.glob(os.path.join(DATASET_DIR, '*.jpg')))
file_list = fnmatch.filter(os.listdir(DATASET_DIR), '*.jpg')
if num_files > 0:
    print("Dataset found!")
    print("Number of files found in datadset: " + str(num_files))
elif num_files == 0:
  print("No data in selected directory, choose again?")

In [None]:
# Creating our dataset object. This object parses the file names to get the labels for each datapoint

# These transforms adjust the images prior to training to promote robust performance
# Note: Some transforms are commented out they are example of possible transforms to use in the future
TRANSFORMS = transforms.Compose([
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),  # Color Jitter #1
    #transforms.ColorJitter(brightness=1.3, hue=.3), # Color Jitter #2
    transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)), # Gaussian Blur #1
    #transforms.GaussianBlur(kernel_size=(7), sigma=(0.8)),  # Gaussian Blur #2

    transforms.Resize((224, 224)), # MUST USE
    transforms.ToTensor(), # MUST USE
    transforms.Lambda(lambda x: x[[2,1,0], ...]), # MUST USE
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # MUST USE
])

Sample_Dataset = XYDataset(DATASET_DIR,TRANSFORMS)

In [None]:
# Using sklearn to split dataset into training and evaluation subsets

def train_val_dataset(dataset, val_split=0.20):
    train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split)
    datasets = {}
    datasets['train'] = Subset(dataset, train_idx)
    datasets['evaluate'] = Subset(dataset, val_idx)
    return datasets

# Both "Train" and "Evaluate" datasets are within the datasets list
datasets = train_val_dataset(Sample_Dataset)
print(f"Number of Data Points in Training Dataset: {len(datasets['train'])}")
print(f"Number of Data Points in Evaluate Dataset: {len(datasets['evaluate'])}")

In [None]:
# Creating the Dataloaders for both the 'train' and the 'eval' datasets
# Here the datasets ('train' and 'evaluate') are input into DataLoaders
# DataLoaders deliver the data to the training algorithm when requested.
# They deliver the data in 'minibatches' , and reshuffle the data for each epoch
BATCH_SIZE = 32 # DEFAULT = 32, if you get a 'malloc' error during training, try reducing this to 16
NUM_WORKERS = 2 # the number of CPU cores to use in supplying the next batch

train_dataloader = DataLoader(datasets['train'], 
                              batch_size=BATCH_SIZE, 
                              shuffle=True,
                              num_workers=NUM_WORKERS,
                              pin_memory=True
                             )
test_dataloader = DataLoader(datasets['evaluate'], 
                             batch_size=BATCH_SIZE, 
                             shuffle=True,
                             num_workers=NUM_WORKERS,
                             pin_memory=True
                            )

## Next, we'll visualize a of the datapoint from our dataset. Run several times to see more datapoints

In [None]:
### Verification of datpoints by visualzation
# Use this cell to visualize several of your data points.
# The imags are displayed with a green dot marking the label location.
# Question: do the labels make sense? Are they where you expect them to be?
# If yes, then you can go ahead and perform the training.
# If not, then considering collecting data again. Focus on careful, slow driving in the center of the lane.


# 1. Define the transform for human eye verification (BGR -> RGB, No Normalization)
verification_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x[[2, 1, 0], ...]) 
])

# 2. Load the dataset into the special verify_dataset we'll use just to view the datapoints
verify_dataset = XYDataset(DATASET_DIR, verification_transform)

# 3. Get an image and its label selected at random from the dataset
# verify_dataset[index] returns (image, category_index, xy_value)
index = random.randint(0, num_files-1)
image_tensor, ann, xy = verify_dataset[index]

# 4. Prepare Image for Plotting
image_to_plot = image_tensor.permute(1, 2, 0).numpy()

# 5. Prepare Label (Calculate Pixel Coordinates)
# We assume the label x is normalized [-1, 1]. We map it to [0, 224].
# xy is a tensor, usually [x, y]. We take the first element for x.
if isinstance(xy, torch.Tensor):
    raw_x = xy[0].item()
else:
    raw_x = xy[0]

# Formula: pixel = width * (normalized_val / 2.0 + 0.5)

pixel_x = int(224 * (raw_x / 2.0 + 0.5))
pixel_y = 112 # We assume a fixed center height for the lane marker (half of 224)

# 6. Plot Image with Label
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.imshow(image_to_plot)

# Draw a Green Circle at the target location
circ = Circle((pixel_x, pixel_y), 5, color='lime', fill=True, linewidth=3)
ax.add_patch(circ)

ax.set_title(f"Data Point Verification")
ax.axis('off')
plt.show()

## Training the CNN on the Selected Dataset
Next, we'll setup the training algorithm for our machine learning model.
As we prepare to train our model we need to make choices about the way we'll train it.
These choices can impact how long it takes to train the model and the overall accuracy of the model.

The user-set parameters of the training algorithm are often called "Hyper-Parameters"
You can set your hyper parameters below, make sure to track which setting you used for your training!
## [ACTION REQUIRED] change the name for your machine learning model, "choose_a_new_name_model"

In [None]:
# Set Training Hyper Parameters:

########## [ACTION REQUIRED] Set name for new machine learning model #################
model_file_name = "choose_a_new_name_model" # Change this, leave the ""
training_notes = "write a good description of the model you're training" # leave the ""
# Example training notes: testing new dataset collected on Friday 1/10/25 at the library

# Number of training epochs: (15 to 25 is a great range to start with)
# epochs are training cycles
epochs =  25 # Hyperparameter (has a direct effect on the accuracy of the model)

# Models have many layers, as you saw above. We can train all layers or just the final layer.
#   train_all_layers = True --> Training takes longer, but may produce a better model. Do this if you have time to train
#   train_all_layers = False --> Do this first, takes less time. Try out the model, if it already has good accuracy, try training all layers next.
train_all_layers = False

#####################################################################################
model_folder = "/home/student/Models/"
model_file_path = model_folder + model_file_name + ".pth"
training_chart_file_path = model_folder+model_file_name+".png"

# Model Name check
if os.path.isfile(model_file_path):
    raise Exception('Sorry, model with same name already exists, choose a different model file name.')

# Model Output
output_dim = 2
# Total number of epochs
total_epochs = epochs

######### Select a Machine learning model structure (Neural Network) ###########
######### Uncomment both the model and the fully connected layer "model.fc"

# Resnet 18
model_name = "Resnet 18"
weights = ResNet18_Weights.DEFAULT 
model = torchvision.models.resnet18(weights=weights)


# Resnet 34
# model_name = "Resnet 34"
# model = torchvision.models.resnet34(pretrained=True)

# Resnet 50
# model_name = "Resnet 50"
# model = torch.hub.load("pytorch/vision", "resnet50", weights="IMAGENET1K_V2")

# MobileNet V2
#model_name = "MobileNet_V2"
#model = torchvision.models.mobilenet_v2(pretrained=True)
#model.fc = torch.nn.Linear(2048, output_dim)

# MobileNet V3
#model_name = "MobileNet_V3"
#model = torchvision.models.mobilenet_v3_large(pretrained=True)
#model.classifier[-1] = torch.nn.Linear(1280, output_dim)

# ALEXNET
#model_name = "ALEXNET"
#model = torchvision.models.alexnet(pretrained=True)
#model.classifier[-1] = torch.nn.Linear(4096, output_dim)

# Save the model structure to use later during optimization
model_structure = model

# Control which part of the model is trained, all layers or just the final layer.
if train_all_layers == False:
    for param in model.parameters():
        param.requires_grad = False

# Adding a fully connected layer to the top/head of the model
model.fc = torch.nn.Linear(512, output_dim) # Use 512 for ResNet 18 and 34, Use 2048 for ResNet50 models

# Model optimizer:
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

#Loading a GPU if available and otherwise a CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
def training_and_evaluation(epochs):
    # Setting up a simple user interface for the training process
    epoch_index_display = widgets.IntText(value=epochs, description='Epoch: ', disabled=False)
    epoch_progress_display = widgets.FloatProgress(value=0.0,max=len(datasets['train']) // BATCH_SIZE +1 , description='Epoch Progress:', bar_style='info', style={'bar_color': '#808080'}, orientation='horizontal')
    training_error_display = widgets.FloatText(value=0.0, description='Mean Square Error (MSE):', disabled=False) 
    progress_meter = widgets.FloatProgress(value=0.0, min=0, max=total_epochs, description='Progress:', bar_style='info', style={'bar_color': '#40E0D0'}, orientation='horizontal')
    system_status = widgets.Text(value='system_status', placeholder='system_status', description='Status:',disabled=False)
    epoch_display = widgets.HBox([epoch_index_display, epoch_progress_display])
    training_display = widgets.HBox([training_error_display, progress_meter])
    display(epoch_display)
    display(training_display)
    display(system_status)
    out1 = widgets.Output()

    display(widgets.HBox([out1]))


    # Training Timing
    start_time = datetime.now()

    # Writing training details to training log
    f = open("/home/student/Models/training_log.txt", "a")
    f.write("\n")
    dt_string = start_time.strftime("%m/%d/%Y %H:%M:%S")
    f.write(f"Training Report: {dt_string} \n")
    f.write(f"Output Model File: {model_file_name}\n")
    f.write(f"Selected Dataset: {dataset_folder_name}, Number Data Points: {num_files}\n")
    f.write(f"Model: {model_name}, Epochs: {epochs}, Batch Size: {BATCH_SIZE}\n")
    f.write(f"Training Notes: {training_notes}\n")
    
    ############# Initiating Training Process ##############


    system_status.value = "Starting training process ..."

    # Enable the cuDNN autotuner which looks for the most efficient algorithms for our training process
    torch.backends.cudnn.benchmark = True
    
    # Start training process dependent on number of epochs
    # EPOCH LOOP: 
    while epochs > 0:
        system_status.value = "Training ..."
        # Index
        i = 0
        sum_loss = 0.0
        error_count = 0.0

        # Training phase: batch loop
        # First set model to train mode
        model.train()
        # BATCH LOOP: Process each batch of data points in the train loader
        for images, category_idx, xy in iter(train_dataloader):
            i = i + 1

            # send data to device
            images = images.to(device)
            xy = xy.to(device)
            
            # zero gradients of parameters
            optimizer.zero_grad()
            
            # execute model to get outputs
            outputs = model(images)
            
            # run backpropogation to accumulate gradients
            loss = 0.0
            loss += torch.mean((outputs - xy)**2)
            loss.backward()
            
            # step optimizer to adjust parameters
            optimizer.step()

            

            # Update Epoch progress meter:
            if i % 10 == 0: # update every 20 baches
                epoch_progress_display.value = i
            
            # compute MSE loss over x, y coordinates for associated categories
            test = False
            if test == True:
                    
                xy = xy.cpu()
                outputs = outputs.detach().cpu().numpy().flatten()
                
                MSE = 0.0
                for j in range(len(xy)):
                  x = (224*(xy[j].numpy()[0]/2.0 + 0.5))
                  xi = (224*(outputs[j]/2.0 + 0.5))
                  MSE = MSE + (x-xi)**2
                
                MSE = MSE/len(xy)
                #training_error_display.value = MSE
    
        scheduler.step()

        # Evaluation phase: Evaluation loop
        # Evaluation Loop
        system_status.value = "Evaluating ..."
        # Put the model into evaluation mode
        model.eval()
        evaluation_loss = 0.0
        i = 0
        with torch.no_grad():
            for images, category_idx, xy in test_dataloader: # TODO: Make sure that xy is not a tensor
            

            
                # send data to device
                images = images.to(device)
                xy = xy.to(device)
            
                # execute model to get outputs
                outputs = model(images)
        
                loss = torch.mean((outputs - xy)**2)
                i += len(xy)
                evaluation_loss += loss.item()*1000

        
        #MSE = MSE/len(xy)
        #evaluation_loss = MSE
        print(f"Epoch: {epochs}, Evaluation Loss: {evaluation_loss/i}")
        training_error_display.value = evaluation_loss/i

        # Update Plot
        with out1:
            data['MSE'].append(evaluation_loss/i)
            live_plot(data, model_file_name=model_file_name, title='Model Error Chart: ' + model_file_name + ".pth")
            
        # End of the current epoch
        epochs = epochs -1
        epoch_index_display.value = epochs
        progress_meter.value = (total_epochs - epochs)

    
    # get the execution time
    end_time = datetime.now()
    elapsed_time = end_time - start_time
    training_duration_time_formatted = str(elapsed_time)
    print('Execution time:', training_duration_time_formatted)
    
    # Finish writing to model training log
    f.write(f"Final model evaluation loss: {evaluation_loss/i}\n")
    f.write(f"Total training & evaluation time: {training_duration_time_formatted}\n")
    f.write("\n")
    f.close()
    system_status.value = "Training complete."   
    return model #trainLoss, validationLoss, model

# START TRAINING
model = training_and_evaluation(epochs)

# SAVE THE MODEL TO FILE
torch.save(model.state_dict(), model_file_path)
print(f"Saved new model as: {model_file_path}")

## Visualizing our new model's predictions
### Our model uses what it learned from the data to infer or predict what the desired turning angle should be for new situations.
### Run the next cell to see the accuracy of your model.

In [None]:
def visualize_model_predictions(model, dataloader, num_images=5):
    # 1. Set model to evaluation mode
    model.eval()
    
    # 2. Get one batch of data
    try:
        images, _, xy = next(iter(dataloader))
    except StopIteration:
        print("DataLoader is empty.")
        return

    # 3. Send to Device
    images = images.to(device)
    
    # 4. Run Inference
    with torch.no_grad():
        outputs = model(images)
        
    # 5. Move data back to CPU
    images = images.cpu()
    xy = xy.cpu()
    outputs = outputs.cpu()
    
    # 6. Setup Plot
    fig, axes = plt.subplots(1, num_images, figsize=(20, 5))
    
    # Un-normalization constants (ImageNet defaults)
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

    print("Visualization Legend:")
    print("---------------------")
    print("Green Circle = Ground Truth (Manual Label)")
    print("Red X       = Model Prediction")
    print("Values displayed are in the new 0-1000 range.\n")

    for i in range(num_images):
        ax = axes[i]
        
        # A. Un-normalize the image so it looks like a photo
        img_tensor = images[i] * std + mean
        img_np = torch.clamp(img_tensor, 0, 1).permute(1, 2, 0).numpy()
        
        ax.imshow(img_np)
        
        # B. Get Normalized Values (-1 to 1)
        norm_true_x = xy[i][0].item()
        norm_pred_x = outputs[i][0].item()
        
        # C. Calculate Display Values (Range 0-1000)
        # This tells you the actual value in your new coordinate system
        val_true_1000 = int((norm_true_x / 2.0 + 0.5) * 1000)
        val_pred_1000 = int((norm_pred_x / 2.0 + 0.5) * 1000)
        
        # D. Calculate Pixel Coordinates (Range 0-224)
        # We must scale this to 224 so it lands on the image correctly
        pixel_true_x = (norm_true_x / 2.0 + 0.5) * 224
        pixel_pred_x = (norm_pred_x / 2.0 + 0.5) * 224
        
        # Assume Y is centered for visualization (or use xy[i][1] if Y is trained)
        pixel_y = 112 

        # E. Draw Markers
        # Ground Truth (Green Circle)
        ax.add_patch(plt.Circle((pixel_true_x, pixel_y), 8, color='lime', fill=False, linewidth=3))
        # Prediction (Red X)
        ax.plot(pixel_pred_x, pixel_y, 'rx', markersize=15, markeredgewidth=3)
        
        # Title with 0-1000 values
        ax.set_title(f"True: {val_true_1000} | Pred: {val_pred_1000}")
        ax.axis('off')

    plt.show()

# Run the sanity check
visualize_model_predictions(model, test_dataloader)


### Optimizing the Machine Learning Model to Run on the Robot
In this final step to the training process, we'll optimize the model.
We optimize the model so that it will run as fast as our camera collects data.


In [None]:
model_file_path = model_folder + model_file_name + ".pth"

# First Optimization Cell
# Warm starting the new model to be optimized - loading weights from trained model into untrained model
model = model_structure # this is the shape of the model before training
model = model.cuda().eval().half()
model.load_state_dict(torch.load(model_file_path, weights_only=True))

# When executed, we should see, "<All keys matched successfully>" 

In [None]:
######## Optimization of the Network ############
### This step can take a few minutes or longer depending on the size of the mdoel

# Custom library from Nvidia to accelerate inference
from torch2trt import torch2trt

# Example structure of the input data
data = torch.zeros((1, 3, 224, 224)).cuda().half()

# Model optimization via quantitization, or the reduction of overall model size by reducing the representation of model weights.
model_trt = torch2trt(model, [data], fp16_mode=True)

# Saving our new optimized model to disk
optimized_model_folder = "/home/student/Models/trt/"
optimized_model_file_path = optimized_model_folder + model_file_name + "_TRT.pth"
torch.save(model_trt.state_dict(), optimized_model_file_path)

# NOTES: If the kernel quits during optimization of the network
1. take note of the model name and structure (example: resnet 18)
2. Go to U2: Model Optimization notebook and select your model for optimization
3. Run all cells in U2: Model Optimization notebook.
4. If this process fails reboot computer and go directly to U2 notebook to try again.
