In [1]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision.models import alexnet
from torch.optim import Adam
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
import numpy as np

In [2]:
mpl.rcParams['figure.dpi'] = 300
sns.set_theme()
sns.set_style("ticks")

# Training

## Setup

In [3]:
# Inputs

df_train_path = 'df_train.csv'
df_val_path  = 'df_val.csv'
df_test_path  = 'df_test.csv'
distort_img_path = 'distorted_crosswalk_images_crop'
num_epochs_for_today = 10
checkpoint_path = 'AlexNet_cuda_weights/AlexNet_Reg_Checkpoint.pth.tar'

In [4]:
df_train_full = pd.read_csv(df_train_path)
df_val_full = pd.read_csv(df_val_path)
df_test_full = pd.read_csv(df_test_path)

df_train = df_train_full[["id2","k"]]
df_val = df_val_full[["id2","k"]]
df_test = df_test_full[["id2","k"]]

In [5]:
# Define the Data Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create the Custom Dataset Class
class CrosswayDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, str(self.dataframe.iloc[idx, 0]) + '.jpg')
        image = Image.open(img_name).convert('RGB')
        k_gt = self.dataframe.iloc[idx, 1] 
        
        if self.transform:
            image = self.transform(image)
        
        return image, k_gt
    
def load_checkpoint(filename):
    checkpoint = torch.load(filename)
    model.load_state_dict(checkpoint['model_state'])
    optimizer.load_state_dict(checkpoint['optimizer_state'])
    start_epoch = checkpoint['epoch']
    return start_epoch

In [6]:
# Create Dataset Objects and DataLoaders
train_dataset = CrosswayDataset(dataframe=df_train, root_dir=distort_img_path, transform=transform)
val_dataset = CrosswayDataset(dataframe=df_val, root_dir=distort_img_path, transform=transform)
test_dataset = CrosswayDataset(dataframe=df_test, root_dir=distort_img_path, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

## Begin training

In [13]:
# Load the pretrained AlexNet model
model = alexnet(pretrained=True)

# Modify the classifier to output a single value for regression
model.classifier[-1] = nn.Linear(in_features=4096, out_features=1)

# Move the model to the GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")  # Add this line to check if GPU is being used

model.to(device)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=1e-6)

# Load checkpoint if it exists
if os.path.exists(checkpoint_path):
    start_epoch = load_checkpoint(checkpoint_path)
    print(f"Resuming training from epoch {start_epoch + 1}")
else:
    start_epoch = 0

# Training loop
num_epochs = start_epoch + num_epochs_for_today

training_loss_list = []
validation_loss_list = []

for epoch in range(start_epoch, num_epochs):
    model.train()
    running_loss = 0.0
    
    # Training phase
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Training]", leave=False)
    for images, k_gt in progress_bar:
        images = images.to(device)
        k_gt = k_gt.to(device).float().view(-1, 1)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, k_gt)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        
        # Update the progress bar
        progress_bar.set_postfix(loss=loss.item())

    epoch_loss = running_loss / len(train_dataset)
    training_loss_list.append(epoch_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}] Training Loss: {epoch_loss}')
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        progress_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Validation]", leave=False)
        for images, k_gt in progress_bar:
            images = images.to(device)
            k_gt = k_gt.to(device).float().view(-1, 1)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, k_gt)

            val_loss += loss.item() * images.size(0)
            
            # Update the progress bar
            progress_bar.set_postfix(loss=loss.item())
    
    val_loss /= len(val_dataset)
    validation_loss_list.append(val_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}] Validation Loss: {val_loss}')
    
    # Save the model and optimizer states for this epoch
    checkpoint = {
        'epoch': epoch + 1,
        'model_state': model.state_dict(),
        'optimizer_state': optimizer.state_dict()
    }
    torch.save(checkpoint, checkpoint_path)
    print(f"Checkpoint saved at epoch {epoch+1}")

    # Save the model weights separately for this epoch
    model_save_path = os.path.join(f'AlexNet_cuda_weights/reg/alexnet_reg_epoch_{epoch}.pth') #############################################
    torch.save(model.state_dict(), model_save_path)
    print(f'Model saved to {model_save_path}')

Using device: cuda
Resuming training from epoch 26


                                                                                         

Epoch [26/30] Training Loss: 2.7095536089653578e-08


                                                                                          

Epoch [26/30] Validation Loss: 9.306827857426441e-10
Checkpoint saved at epoch 26
Model saved to AlexNet_weights/reg/alexnet_reg_epoch_26.pth


                                                                                         

Epoch [27/30] Training Loss: 2.316914962108285e-08


                                                                                         

Epoch [27/30] Validation Loss: 2.627140891004526e-09
Checkpoint saved at epoch 27
Model saved to AlexNet_weights/reg/alexnet_reg_epoch_27.pth


                                                                                         

Epoch [28/30] Training Loss: 2.0363378996133252e-08


                                                                                         

Epoch [28/30] Validation Loss: 1.1511624263299828e-08
Checkpoint saved at epoch 28
Model saved to AlexNet_weights/reg/alexnet_reg_epoch_28.pth


                                                                                         

Epoch [29/30] Training Loss: 1.798281058302999e-08


                                                                                          

Epoch [29/30] Validation Loss: 1.6408434716003677e-09
Checkpoint saved at epoch 29
Model saved to AlexNet_weights/reg/alexnet_reg_epoch_29.pth


                                                                                         

Epoch [30/30] Training Loss: 1.5409533868354587e-08


                                                                                         

Epoch [30/30] Validation Loss: 1.1308701296430015e-08
Checkpoint saved at epoch 30
Model saved to AlexNet_weights/reg/alexnet_reg_epoch_30.pth


In [14]:
training_loss_list

[2.7095536089653578e-08,
 2.316914962108285e-08,
 2.0363378996133252e-08,
 1.798281058302999e-08,
 1.5409533868354587e-08]

In [15]:
validation_loss_list

[9.306827857426441e-10,
 2.627140891004526e-09,
 1.1511624263299828e-08,
 1.6408434716003677e-09,
 1.1308701296430015e-08]

# Inference

In [7]:
# Define the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load the pretrained AlexNet model
model = alexnet(pretrained=True)

# Modify the classifier to output 18 classes
model.classifier[-1] = nn.Linear(in_features=4096, out_features=1)

# Load the model weights
model_path = 'AlexNet_cuda_weights/reg/alexnet_reg_epoch_14.pth'  ############# Select your trained model weights
model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()

Using device: cuda


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

## Testing on 1 image

In [8]:
# Define the image transformations (same as used during training)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Function to perform inference and get probabilities
def predict_image(image_path):
    # Load and preprocess the image
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)  # Add batch dimension
    image = image.to(device)

    # Perform inference
    with torch.no_grad():
        output = model(image)

    # Get the prediction (regression output)
    prediction = output.item()

    return prediction

In [9]:
# Example usage
test_img_path = os.path.join(distort_img_path, str(df_test.iloc[0, 0]) + '.jpg')
prediction = predict_image(test_img_path)
prediction = "{:.5e}".format(prediction)

ground_truth = df_test.iloc[0,1]
ground_truth = "{:.5e}".format(ground_truth)

print(f'Predicted k: {prediction}')
print(f'Ground truth k: {ground_truth}')

Predicted k: 5.65106e-05
Ground truth k: 7.77023e-05


## Testing on `test_loader`, Classification Report, Confusion Matrix

In [10]:
# Assuming the model is already loaded and test_dataset is defined
model.eval()

# DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Initialize lists to store true labels and predictions
all_k_gts = []
all_k_preds = []

# Inference on the test dataset
with torch.no_grad():
    for images, k_gt in test_loader:
        images = images.to(device)
        k_gts = k_gt.to(device)

        k_preds = model(images)

        all_k_gts.extend(k_gt.cpu().numpy())
        all_k_preds.extend(k_preds.cpu().numpy())

In [11]:
# Convert lists to numpy arrays
all_k_gts = np.array(all_k_gts)
all_k_preds = np.array(all_k_preds)

In [12]:
df_test_report = df_test.copy(deep=True)
df_test_report["k_preds"] = all_k_preds
df_test_report

Unnamed: 0,id2,k,k_preds
0,2066703,0.000078,0.000057
1,2074905,0.000062,0.000057
2,2022038,0.000061,0.000057
3,2060686,0.000100,0.000057
4,2004094,0.000036,0.000057
...,...,...,...
9895,2021815,0.000037,0.000057
9896,2065463,0.000025,0.000057
9897,2011739,0.000058,0.000057
9898,2018046,0.000030,0.000057


In [13]:
percent_mae = np.mean(100 * np.abs(all_k_preds - all_k_gts) / all_k_gts)

print(f"% Mean Absolute Error: {percent_mae:.2f}%")

% Mean Absolute Error: 190.49%


In [14]:
# Mean Absolute Error
mae = mean_absolute_error(all_k_gts, all_k_preds)

# Root Mean Squared Error
rmse = np.sqrt(mean_squared_error(all_k_gts, all_k_preds))

# R-squared
r2 = r2_score(all_k_gts, all_k_preds)

# Print the results
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R²): {r2}")

Mean Absolute Error (MAE): 2.5751273412329176e-05
Root Mean Squared Error (RMSE): 3.0787421069709145e-05
R-squared (R²): -0.1601533067405283


# All at once

In [25]:
# Define the image transformations (same as used during training)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Function to perform inference and get probabilities
def predict_image(image_path):
    # Load and preprocess the image
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)  # Add batch dimension
    image = image.to(device)

    # Perform inference
    with torch.no_grad():
        output = model(image)

    # Get the prediction (regression output)
    prediction = output.item()

    return prediction

def all_at_once(model_path):
    
    # Define the device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Load the pretrained AlexNet model
    model = alexnet(pretrained=True)

    # Modify the classifier to output 18 classes
    model.classifier[-1] = nn.Linear(in_features=4096, out_features=1)

    # Load the model weights
    model.load_state_dict(torch.load(model_path))
    model.to(device)
    model.eval()
    
    # Initialize lists to store true labels and predictions
    all_k_gts = []
    all_k_preds = []

    # Inference on the test dataset
    with torch.no_grad():
        for images, k_gt in test_loader:
            images = images.to(device)
            k_gts = k_gt.to(device)

            k_preds = model(images)

            all_k_gts.extend(k_gt.cpu().numpy())
            all_k_preds.extend(k_preds.cpu().numpy())
            
    # Convert lists to numpy arrays
    all_k_gts = np.array(all_k_gts)
    all_k_preds = np.array(all_k_preds)
    df_test_report = df_test.copy(deep=True)
    df_test_report["k_preds"] = all_k_preds
    average_percentage_error = np.mean(100 * np.abs(all_k_preds - all_k_gts) / all_k_gts)
    return round(average_percentage_error,4)

In [26]:
average_percentage_error_list = []

for i in range(16,31):
    model_path = f"AlexNet_cuda_weights/reg/alexnet_reg_epoch_{i}.pth"
    average_percentage_error = all_at_once(model_path)
    average_percentage_error_list.append(average_percentage_error)
    print(f"Epoch {i}: average_percentage_error = {average_percentage_error}")

average_percentage_error_list

Using device: cuda
Epoch 16: average_percentage_error = 115.3778
Using device: cuda
Epoch 17: average_percentage_error = 667.0475
Using device: cuda
Epoch 18: average_percentage_error = 138.4431
Using device: cuda
Epoch 19: average_percentage_error = 476.3731
Using device: cuda
Epoch 20: average_percentage_error = 156.2892
Using device: cuda
Epoch 21: average_percentage_error = 1017.9783
Using device: cuda
Epoch 22: average_percentage_error = 484.4738
Using device: cuda
Epoch 23: average_percentage_error = 314.8451
Using device: cuda
Epoch 24: average_percentage_error = 625.2054
Using device: cuda
Epoch 25: average_percentage_error = 138.0645
Using device: cuda
Epoch 26: average_percentage_error = 202.4918
Using device: cuda
Epoch 27: average_percentage_error = 333.7178
Using device: cuda
Epoch 28: average_percentage_error = 616.6683
Using device: cuda
Epoch 29: average_percentage_error = 273.45
Using device: cuda
Epoch 30: average_percentage_error = 612.038


[115.3778,
 667.0475,
 138.4431,
 476.3731,
 156.2892,
 1017.9783,
 484.4738,
 314.8451,
 625.2054,
 138.0645,
 202.4918,
 333.7178,
 616.6683,
 273.45,
 612.038]