#   Imports and Set to GPU

In [48]:
# Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnext101_32x8d
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torchvision.datasets import ImageFolder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from torch.optim.lr_scheduler import StepLR
from PIL import Image
import pandas as pd
import os
import shutil

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set the CUDA device if available
if torch.cuda.is_available():
    torch.cuda.set_device(0)  # Set to the desired GPU device ID
    print("set to GPU")

print(torch.version.cuda)

set to GPU
11.7


In [49]:
print(torch.__version__)

2.0.1+cu117


# Import Data and split into train/val/test

In [50]:
# Note: After you run this cell, the training and test data will be available in
# the file browser. (Click the folder icon on the left to view it)
#
# If you don't see the data after the cell completes, click the refresh button
# in the file browser (folder icon with circular arrow)

# First, let's download and unzip the data
#!echo "Downloading files..."
#!wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/training1.zip
#!wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/training2.zip
#!wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/test.zip
#!wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/test_partial.zip
#!wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/test_classes_partial.csv

#!echo "Unzipping files..."
#!tar -xf C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\training1.zip
#!tar -xf C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\training2.zip
#!tar -xf C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\test.zip
#!tar -xf C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\test_partial.zip

# Combine the two traning directories
#!echo "Merging training data..."
#!mkdir C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\training
#!mv C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\training1\* C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\training
#!mv C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\training2\* C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\training

# Cleanup
#!echo "Cleaning up..."
#!rmdir C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\training1
#!rmdir C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\training2
#!rm training1.zip
#!rm training2.zip
#!rm test.zip
#!rm test_partial.zip

#!echo "Data ready."

In [51]:
####################################### Spliting Folders #######################################

#main_folder_path = r'C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\training'

# Get the list of subfolder names (1 - 42)
#subfolders = [f.name for f in os.scandir(main_folder_path) if f.is_dir()]

# Create empty lists to hold the file paths for train and test data
#train_files = []
#test_files = []
#train_labels = []
#test_labels = []

# Iterate over each subfolder
#for subfolder in subfolders:
#    subfolder_path = os.path.join(main_folder_path, subfolder)
#    
    # Get the list of file names in the current subfolder
#    file_names = [f.name for f in os.scandir(subfolder_path) if f.is_file()]
    
    # Create labels corresponding to the subfolder
#    labels = [subfolder] * len(file_names)
    
    # Split the file names and labels into train and test sets
#    train_files_subfolder, test_files_subfolder, train_labels_subfolder, test_labels_subfolder = \
#        train_test_split(file_names, labels, test_size=0.2, random_state=42)
    
    # Append the full file paths and labels to the respective train and test lists
#    train_files += [os.path.join(subfolder_path, file_name) for file_name in train_files_subfolder]
#    test_files += [os.path.join(subfolder_path, file_name) for file_name in test_files_subfolder]
#    train_labels += train_labels_subfolder
#    test_labels += test_labels_subfolder

# Create train and test directories to store the split files
#train_dir = r'C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\train'
#test_dir = r'C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\val'

#os.makedirs(train_dir, exist_ok=True)
#os.makedirs(test_dir, exist_ok=True)


# Copy the train files to the train directory
#for train_file, train_label in zip(train_files, train_labels):
#    label_dir = os.path.join(train_dir, train_label)
#    os.makedirs(label_dir, exist_ok=True)
#    shutil.copy2(train_file, label_dir)

# Copy the test files to the test directory
#for test_file, test_label in zip(test_files, test_labels):
#    label_dir = os.path.join(test_dir, test_label)
#    os.makedirs(label_dir, exist_ok=True)
#    shutil.copy2(test_file, label_dir)

In [52]:
####################################### Train Val Split #######################################

# Set the training directory to the training photos
training_dir = r'C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\train'
val_dir = r'C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\val'

transform = ToTensor()

train_dataset = ImageFolder(root=training_dir, transform=transform)
val_dataset = ImageFolder(root=val_dir, transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset)

In [53]:
print (train_dataset)

Dataset ImageFolder
    Number of datapoints: 31367
    Root location: C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\train
    StandardTransform
Transform: ToTensor()


# Train the Model

In [54]:
# Use the pretrained model but add the last layer to our target classes
class StreetSignClassifier(nn.Module):
    def __init__(self, num_classes):
        super(StreetSignClassifier, self).__init__()
        self.resnext = resnext101_32x8d(pretrained=True)
        num_filters = self.resnext.fc.in_features
        self.resnext.fc = nn.Linear(num_filters, num_classes)
    
    def forward(self, x):
        return self.resnext(x)

# Initialize the new model
model = StreetSignClassifier(num_classes=43)  # Assuming there are 42 sign classes
train_accuracy_list = []
val_accuracy_list = []

# Set Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = StepLR(optimizer, step_size=3, gamma=0.1)

# Train Model
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for images, labels in train_dataloader:
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    # Calculate average training loss for the epoch
    train_loss = running_loss / len(train_dataloader)

    model.eval()  # Set the model to evaluation mode
    y_true = []
    y_pred = []

    with torch.no_grad():
        for images, labels in train_dataloader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    train_accuracy = accuracy_score(y_true, y_pred)
    train_f1 = f1_score(y_true, y_pred, average='macro')
    train_accuracy_list.append(train_accuracy)


    y_true = []
    y_pred = []

    with torch.no_grad():
        for images, labels in val_dataloader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    # Calculate accuracy and F1 score
    val_accuracy = accuracy_score(y_true, y_pred)
    val_f1 = f1_score(y_true, y_pred, average='macro')
    val_accuracy_list.append(val_accuracy)
    current_lr = optimizer.param_groups[0]['lr'] 
    scheduler.step()  # Update the learning rate
        
    print(f"Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.4f} Train Accuracy: {train_accuracy:.4f} Train F1 Score: {train_f1:.4f} Val Accuracy: {val_accuracy:.4f} Val F1 Score: {val_f1:.4f} Learning Rate: {current_lr}")
    



Epoch 1/10: Train Loss: 2.5119 Train Accuracy: 0.6486 Train F1 Score: 0.5491 Val Accuracy: 0.6446 Val F1 Score: 0.5436 Learning Rate: 0.01
Epoch 2/10: Train Loss: 0.4868 Train Accuracy: 0.9291 Train F1 Score: 0.9098 Val Accuracy: 0.9208 Val F1 Score: 0.9008 Learning Rate: 0.01
Epoch 3/10: Train Loss: 0.2157 Train Accuracy: 0.9430 Train F1 Score: 0.9370 Val Accuracy: 0.9368 Val F1 Score: 0.9268 Learning Rate: 0.01
Epoch 4/10: Train Loss: 0.0620 Train Accuracy: 0.9930 Train F1 Score: 0.9926 Val Accuracy: 0.9839 Val F1 Score: 0.9830 Learning Rate: 0.001
Epoch 5/10: Train Loss: 0.0361 Train Accuracy: 0.9964 Train F1 Score: 0.9958 Val Accuracy: 0.9876 Val F1 Score: 0.9847 Learning Rate: 0.001
Epoch 6/10: Train Loss: 0.0232 Train Accuracy: 0.9982 Train F1 Score: 0.9980 Val Accuracy: 0.9866 Val F1 Score: 0.9855 Learning Rate: 0.001
Epoch 7/10: Train Loss: 0.0131 Train Accuracy: 0.9991 Train F1 Score: 0.9989 Val Accuracy: 0.9892 Val F1 Score: 0.9886 Learning Rate: 0.0001
Epoch 8/10: Train Loss

# Validate Model

In [55]:
#model.eval()  # Set the model to evaluation mode

#y_true = []
#y_pred = []

#with torch.no_grad():
#    for images, labels in val_dataloader:
#        images = images.to(device)
#        labels = labels.to(device)

#        outputs = model(images)
#        _, predicted = torch.max(outputs, 1)

#        y_true.extend(labels.cpu().numpy())
#        y_pred.extend(predicted.cpu().numpy())

# Calculate accuracy and F1 score
#accuracy = accuracy_score(y_true, y_pred)
#f1 = f1_score(y_true, y_pred, average='macro')

#print(f"Accuracy: {accuracy:.4f}")
#print(f"F1 Score: {f1:.4f}")

# Test Model

In [56]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, 0])
        image = Image.open(img_name)
        
        if self.transform:
            image = self.transform(image)
        
        label = self.data_frame.iloc[idx, 1]
        
        return image, label

# Define the test dataset
csv_file = r'C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\Spencer_Work\test_classes_partial.csv'
test_dir = r'C:\Users\dogeb\Documents\GitHub\CSE450-Team\Project_4\test_partial'
transform = ToTensor()

# Create the custom dataset
test_dataset = TestDataset(csv_file, test_dir, transform=transform)
test_dataloader = DataLoader(test_dataset, shuffle=False)

# ... Rest of the code ...



# Metrics

In [57]:


y_true = []
y_pred = []

with torch.no_grad():
    for images, labels in test_dataloader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

# Calculate accuracy and F1 score
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='macro')

print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")

print(y_pred)
print(y_true)

Accuracy: 0.9502
F1 Score: 0.9255
[16, 1, 38, 33, 11, 38, 18, 12, 25, 35, 12, 7, 23, 7, 4, 9, 21, 20, 27, 38, 4, 33, 9, 3, 1, 11, 13, 10, 9, 11, 5, 17, 34, 23, 2, 17, 3, 12, 16, 8, 7, 30, 18, 12, 24, 25, 3, 10, 18, 1, 7, 13, 15, 9, 13, 35, 5, 26, 9, 16, 38, 10, 4, 9, 15, 9, 26, 2, 5, 28, 11, 25, 30, 33, 5, 12, 1, 10, 25, 21, 21, 33, 25, 7, 10, 35, 3, 7, 22, 13, 3, 1, 2, 14, 12, 32, 3, 38, 9, 33, 1, 10, 5, 11, 33, 4, 35, 25, 33, 4, 1, 14, 16, 10, 23, 3, 27, 29, 1, 17, 13, 7, 1, 8, 2, 10, 10, 31, 1, 6, 36, 3, 14, 13, 11, 10, 18, 40, 2, 38, 41, 4, 6, 18, 17, 25, 2, 9, 11, 21, 7, 24, 11, 25, 17, 3, 6, 9, 7, 4, 13, 16, 4, 27, 18, 9, 13, 14, 29, 17, 13, 38, 26, 25, 33, 1, 3, 40, 13, 2, 8, 4, 36, 25, 20, 25, 18, 1, 10, 8, 10, 29, 12, 38, 31, 2, 8, 40, 24, 28, 17]
[16, 1, 38, 33, 11, 38, 18, 12, 25, 35, 12, 7, 23, 7, 4, 9, 21, 20, 27, 38, 4, 33, 9, 3, 1, 11, 13, 10, 9, 11, 5, 17, 34, 23, 2, 17, 3, 12, 16, 8, 7, 30, 18, 12, 24, 25, 3, 10, 18, 8, 25, 13, 15, 9, 13, 35, 5, 26, 9, 16, 38, 10, 4, 9