## Google Colab setup (don't run locally)

In [1]:
from google.colab import drive
import os
import shutil

# Mount drive to colab
drive.mount('/content/drive', force_remount=True)

# setting up paths
path_to_project_files = '/content/drive/MyDrive/School/Homework/Spring2025/DL/Project/'
existing = os.path.join(path_to_project_files, 'kaggle.json')
path_to_colab_utils = '/root/.kaggle'
target = os.path.join(path_to_colab_utils, 'kaggle.json')

# move the key to the colab root
os.makedirs(path_to_colab_utils, exist_ok=True)
shutil.copy(existing, target)
os.chmod(target, 600)

# download the data into /content (which is temporary)
#!kaggle datasets download -d xhlulu/leafsnap-dataset -p /content --unzip

Mounted at /content/drive


In [None]:
import sys

# Edit this path to where you've uploaded the repo files, so the imports work.
sys.path.append('/content/drive/MyDrive/School/Homework/Spring2025/DL/Project/')

## Library Imports

In [None]:
from autoencoder import *
from dataloader import *
from cnn import *

import torch
from torch.utils.data import DataLoader
from torchvision.transforms import v2
import os

In [None]:
import matplotlib.pyplot as plt

def showTensorInNotebook(tensor):
    """
    This takes a (3[RGB], H, W) tensor in R[0.0, 1.0] and displays it with matplotlib.
    """
    image = tensor.detach().cpu().numpy().transpose(1,2,0) # move the channel axis to the end, because PIL and matplotlib hate each other
    plt.imshow(image)
    plt.axis('off')
    plt.show()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


## Building the data loader

These are transforms that allow us to ingest the image tensors with some extra confusion at training time. `processor` makes the data loader spit out tensors, and `noiser` adds Gaussian noise.

In [None]:
# This just processes the images.
NOISE_RATIO = 0.1
H, W = 256, 256

processor = v2.Compose([
    v2.PILToTensor(), # the LeafsnapDataset class gives PIL Images, convert to torch Tensor
    lambda x: v2.functional.crop(x, 0, 0, 600, 600),
    v2.Resize((H, W)), # resize
    v2.RandomHorizontalFlip(),
    v2.RandomVerticalFlip(),
    v2.RandomRotation(degrees=(-45, 45)),
    lambda x: x / 255.0, # convert N[0, 255] to R[0.0, 1.0]
    #lambda x: torch.clip(x + NOISE_RATIO*torch.randn_like(x), 0.0, 1.0), # add noise
])

lab_processor = v2.Compose([
    v2.PILToTensor(), # the LeafsnapDataset class gives PIL Images, convert to torch Tensor
    lambda x: v2.functional.crop(x, 0, 0, 600, 600),
    v2.Resize((H, W)), # resize
    v2.RandomHorizontalFlip(),
    v2.RandomVerticalFlip(),
    v2.RandomRotation(degrees=(-45, 45)),
    lambda x: x / 255.0, # convert N[0, 255] to R[0.0, 1.0]
    #lambda x: torch.clip(x + NOISE_RATIO*torch.randn_like(x), 0.0, 1.0), # add noise
])

test_processor = v2.Compose([
    v2.PILToTensor(), # the LeafsnapDataset class gives PIL Images, convert to torch Tensor
    lambda x: v2.functional.crop(x, 0, 0, 600, 600),
    v2.Resize((H, W)), # resize
    lambda x: x / 255.0, # convert N[0, 255] to R[0.0, 1.0]
])


In [None]:
BATCH_SIZE = 64

root_directory = os.path.join(os.getcwd(), 'leafsnap-dataset') # you make need to edit this path to work, though, it works on Colab by default and works locally if you keep the dataset at the root of the repo
train_image_paths_file = os.path.join(path_to_project_files, "train.txt")
train_dataset = LeafsnapDataset(train_image_paths_file, root_directory, use_segmented=False, source="both", expand_lab=True, transform=processor, lab_transform=lab_processor)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

validation_image_paths_file = os.path.join(path_to_project_files, "validation.txt")
validation_dataset = LeafsnapDataset(validation_image_paths_file, root_directory, use_segmented=False, source="both", expand_lab=True, transform=processor, lab_transform=lab_processor)
validation_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True)

test_image_paths_file = os.path.join(path_to_project_files, "test.txt")
test_dataset = LeafsnapDataset(test_image_paths_file, root_directory, use_segmented=False, source="both", transform=test_processor, lab_transform=test_processor)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

## Training the Convolutional Neural Network (CNN)

This model uses a modified version of ResNet from Homework 2. It specifically is a version of ResNet34, with kernel size raised to 5, and skip layers at sizes 32, 64, and 128. Images have been downscaled to 128x128, and the segmentation image is used as a 4th layer, resulting in an input size of 4x128x128.

In [None]:
model = resnet(3, 185, device=device)
#model.load_state_dict(torch.load(os.path.join(path_to_project_files, 'cnn_model.pth'), weights_only=True))

train_resnet_model(model, train_dataloader, validation_dataloader, 25, .001, device=device)

Epoch 1/25: 100%|██████████| 385/385 [09:46<00:00,  1.52s/it, batch=385/385, loss=5.06]
Test Epoch 1/25: 100%|██████████| 48/48 [00:45<00:00,  1.06it/s, batch=48/48, test_loss=5.17]


Epoch 1 loss: 5.252708208406126, val loss: 5.104244937499364


Epoch 2/25: 100%|██████████| 385/385 [09:42<00:00,  1.51s/it, batch=385/385, loss=4.84]
Test Epoch 2/25: 100%|██████████| 48/48 [00:45<00:00,  1.07it/s, batch=48/48, test_loss=5.12]


Epoch 2 loss: 5.073941355866271, val loss: 4.962892522414525


Epoch 3/25: 100%|██████████| 385/385 [09:42<00:00,  1.51s/it, batch=385/385, loss=4.82]
Test Epoch 3/25: 100%|██████████| 48/48 [00:44<00:00,  1.07it/s, batch=48/48, test_loss=4.86]


Epoch 3 loss: 4.926149082183838, val loss: 4.766477127869924


Epoch 4/25: 100%|██████████| 385/385 [09:45<00:00,  1.52s/it, batch=385/385, loss=4.77]
Test Epoch 4/25: 100%|██████████| 48/48 [00:45<00:00,  1.07it/s, batch=48/48, test_loss=4.81]


Epoch 4 loss: 4.778993746522185, val loss: 4.674857834974925


Epoch 5/25: 100%|██████████| 385/385 [09:43<00:00,  1.51s/it, batch=385/385, loss=4.53]
Test Epoch 5/25: 100%|██████████| 48/48 [00:44<00:00,  1.08it/s, batch=48/48, test_loss=4.37]


Epoch 5 loss: 4.641987433990875, val loss: 4.496349374453227


Epoch 6/25: 100%|██████████| 385/385 [09:42<00:00,  1.51s/it, batch=385/385, loss=4.47]
Test Epoch 6/25: 100%|██████████| 48/48 [00:45<00:00,  1.06it/s, batch=48/48, test_loss=4.8]


Epoch 6 loss: 4.513016284595836, val loss: 4.672406435012817


Epoch 7/25: 100%|██████████| 385/385 [09:47<00:00,  1.53s/it, batch=385/385, loss=4.78]
Test Epoch 7/25: 100%|██████████| 48/48 [00:44<00:00,  1.08it/s, batch=48/48, test_loss=4.17]


Epoch 7 loss: 4.384509207985618, val loss: 4.335564414660136


Epoch 8/25: 100%|██████████| 385/385 [09:42<00:00,  1.51s/it, batch=385/385, loss=4.57]
Test Epoch 8/25: 100%|██████████| 48/48 [00:45<00:00,  1.05it/s, batch=48/48, test_loss=4.28]


Epoch 8 loss: 4.268460428559935, val loss: 4.307053565979004


Epoch 9/25: 100%|██████████| 385/385 [09:42<00:00,  1.51s/it, batch=385/385, loss=4.3]
Test Epoch 9/25: 100%|██████████| 48/48 [00:45<00:00,  1.07it/s, batch=48/48, test_loss=4.09]


Epoch 9 loss: 4.154192702801197, val loss: 4.419865717490514


Epoch 10/25: 100%|██████████| 385/385 [09:41<00:00,  1.51s/it, batch=385/385, loss=4.27]
Test Epoch 10/25: 100%|██████████| 48/48 [00:44<00:00,  1.08it/s, batch=48/48, test_loss=4.37]


Epoch 10 loss: 4.059745127194888, val loss: 4.121545831362407


Epoch 11/25: 100%|██████████| 385/385 [09:41<00:00,  1.51s/it, batch=385/385, loss=4.28]
Test Epoch 11/25: 100%|██████████| 48/48 [00:44<00:00,  1.08it/s, batch=48/48, test_loss=17.1]


Epoch 11 loss: 3.953886775846605, val loss: 17.64417628447215


Epoch 12/25: 100%|██████████| 385/385 [09:42<00:00,  1.51s/it, batch=385/385, loss=3.77]
Test Epoch 12/25: 100%|██████████| 48/48 [00:45<00:00,  1.06it/s, batch=48/48, test_loss=4.33]


Epoch 12 loss: 3.8649850083636, val loss: 4.511027892430623


Epoch 13/25: 100%|██████████| 385/385 [09:34<00:00,  1.49s/it, batch=385/385, loss=4.23]
Test Epoch 13/25: 100%|██████████| 48/48 [00:45<00:00,  1.05it/s, batch=48/48, test_loss=3.88]


Epoch 13 loss: 3.7754504996460754, val loss: 3.6631623804569244


Epoch 14/25: 100%|██████████| 385/385 [09:34<00:00,  1.49s/it, batch=385/385, loss=4.16]
Test Epoch 14/25: 100%|██████████| 48/48 [00:46<00:00,  1.04it/s, batch=48/48, test_loss=5.21]


Epoch 14 loss: 3.6826872757502964, val loss: 5.017384548981984


Epoch 15/25: 100%|██████████| 385/385 [09:39<00:00,  1.50s/it, batch=385/385, loss=3.53]
Test Epoch 15/25: 100%|██████████| 48/48 [00:44<00:00,  1.07it/s, batch=48/48, test_loss=6.76]


Epoch 15 loss: 3.602580077926834, val loss: 6.8920773069063825


Epoch 16/25: 100%|██████████| 385/385 [09:42<00:00,  1.51s/it, batch=385/385, loss=3.42]
Test Epoch 16/25: 100%|██████████| 48/48 [00:44<00:00,  1.08it/s, batch=48/48, test_loss=3.48]


Epoch 16 loss: 3.530696221760341, val loss: 3.9027149279912314


Epoch 17/25: 100%|██████████| 385/385 [09:40<00:00,  1.51s/it, batch=385/385, loss=3.96]
Test Epoch 17/25: 100%|██████████| 48/48 [00:45<00:00,  1.06it/s, batch=48/48, test_loss=34.7]


Epoch 17 loss: 3.4655457143659714, val loss: 36.44069425264994


Epoch 18/25: 100%|██████████| 385/385 [09:44<00:00,  1.52s/it, batch=385/385, loss=3.23]
Test Epoch 18/25: 100%|██████████| 48/48 [00:46<00:00,  1.04it/s, batch=48/48, test_loss=3.7]


Epoch 18 loss: 3.3772970273897247, val loss: 3.991088777780533


Epoch 19/25: 100%|██████████| 385/385 [09:43<00:00,  1.51s/it, batch=385/385, loss=3.32]
Test Epoch 19/25: 100%|██████████| 48/48 [00:45<00:00,  1.05it/s, batch=48/48, test_loss=10.5]


Epoch 19 loss: 3.299822368869534, val loss: 11.23257178068161


Epoch 20/25: 100%|██████████| 385/385 [09:48<00:00,  1.53s/it, batch=385/385, loss=4.41]
Test Epoch 20/25: 100%|██████████| 48/48 [00:46<00:00,  1.04it/s, batch=48/48, test_loss=10.5]


Epoch 20 loss: 3.2548357412412567, val loss: 10.823185880978903


Epoch 21/25: 100%|██████████| 385/385 [09:48<00:00,  1.53s/it, batch=385/385, loss=3.49]
Test Epoch 21/25: 100%|██████████| 48/48 [00:44<00:00,  1.07it/s, batch=48/48, test_loss=3.48]


Epoch 21 loss: 3.174153251771803, val loss: 3.75618477165699


Epoch 22/25: 100%|██████████| 385/385 [09:44<00:00,  1.52s/it, batch=385/385, loss=3.22]
Test Epoch 22/25: 100%|██████████| 48/48 [00:43<00:00,  1.10it/s, batch=48/48, test_loss=22.6]


Epoch 22 loss: 3.121947391931113, val loss: 25.97456153233846


Epoch 23/25: 100%|██████████| 385/385 [09:45<00:00,  1.52s/it, batch=385/385, loss=3.45]
Test Epoch 23/25: 100%|██████████| 48/48 [00:45<00:00,  1.06it/s, batch=48/48, test_loss=11.9]


Epoch 23 loss: 3.0694427217755997, val loss: 10.002659122149149


Epoch 24/25: 100%|██████████| 385/385 [09:48<00:00,  1.53s/it, batch=385/385, loss=2.86]
Test Epoch 24/25: 100%|██████████| 48/48 [00:45<00:00,  1.04it/s, batch=48/48, test_loss=3.86]


Epoch 24 loss: 3.0100897962396793, val loss: 3.488509694735209


Epoch 25/25: 100%|██████████| 385/385 [09:47<00:00,  1.53s/it, batch=385/385, loss=2.7]
Test Epoch 25/25: 100%|██████████| 48/48 [00:44<00:00,  1.09it/s, batch=48/48, test_loss=3.58]

Epoch 25 loss: 2.970027363145506, val loss: 3.711844985683759





In [None]:
torch.save(model.state_dict(), path_to_project_files + "cnn_model.pth")

## Testing the CNN

In [None]:
correct_top1 = 0
correct_top5 = 0
total = 0

model.to(device)
with torch.no_grad(): # No gradients needed for evaluation
    model.eval()
    for inputs, labels in test_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)

        # Top-1 Accuracy
        _, predicted = torch.max(outputs, 1)
        correct_top1 += (predicted == labels).sum().item()

        # Top-5 Accuracy
        top5_preds = torch.topk(outputs, 5, dim=1).indices
        correct_top5 += torch.sum(top5_preds.eq(labels.view(-1, 1))).item()

        total += labels.size(0)

# Compute accuracies
top1_accuracy = 100 * correct_top1 / total
top5_accuracy = 100 * correct_top5 / total

print(f"Top-1 Accuracy: {top1_accuracy:.2f}%")
print(f"Top-5 Accuracy: {top5_accuracy:.2f}%")


Top-1 Accuracy: 17.19%
Top-5 Accuracy: 45.50%


After various tweaks, I'm very happy with the current training accuracy of the CNN model, especially for the first check-in. Running at a 90% Top-5 accuracy is excellent, although there is certainly some more hyperparameter tweaking to be done. I may also test changing the model's structure, adding techniques such as dropout that have been used in other models for similar purposes. I would like to reach 80% Top-1 accuracy by the end.