## Google Colab setup (don't run locally)

In [None]:
from google.colab import drive
import os
import shutil

# Mount drive to colab
drive.mount('/content/drive', force_remount=True)

# setting up paths
path_to_project_files = '/content/drive/MyDrive/School/Homework/Spring2025/DL/Project/'
existing = os.path.join(path_to_project_files, 'kaggle.json')
path_to_colab_utils = '/root/.kaggle'
target = os.path.join(path_to_colab_utils, 'kaggle.json')

# move the key to the colab root
os.makedirs(path_to_colab_utils, exist_ok=True)
shutil.copy(existing, target)
os.chmod(target, 600)

# download the data into /content (which is temporary)
!kaggle datasets download -d xhlulu/leafsnap-dataset -p /content --unzip

Mounted at /content/drive
Dataset URL: https://www.kaggle.com/datasets/xhlulu/leafsnap-dataset
License(s): copyright-authors


In [None]:
import sys

# Edit this path to where you've uploaded the repo files, so the imports work.
sys.path.append('/content/drive/MyDrive/School/Homework/Spring2025/DL/Project/')

## Library Imports

In [None]:
from autoencoder import *
from dataloader import *
from cnn import *

import torch
from torch.utils.data import DataLoader
from torchvision.transforms import v2
import os

In [None]:
import matplotlib.pyplot as plt

def showTensorInNotebook(tensor):
    """
    This takes a (3[RGB], H, W) tensor in R[0.0, 1.0] and displays it with matplotlib.
    """
    image = tensor.detach().cpu().numpy().transpose(1,2,0) # move the channel axis to the end, because PIL and matplotlib hate each other
    plt.imshow(image)
    plt.axis('off')
    plt.show()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


## Building the data loader

These are transforms that allow us to ingest the image tensors with some extra confusion at training time. `processor` makes the data loader spit out tensors, and `noiser` adds Gaussian noise.

In [11]:
# This just processes the images.
NOISE_RATIO = 0.1
H, W = 128, 128

processor = v2.Compose([
    v2.PILToTensor(), # the LeafsnapDataset class gives PIL Images, convert to torch Tensor
    v2.RandomRotation(degrees=(-90, 90)),
    v2.Resize((H, W)), # resize
    lambda x: x / 255.0, # convert N[0, 255] to R[0.0, 1.0]
    lambda x: torch.clip(x + NOISE_RATIO*torch.randn_like(x), 0.0, 1.0), # add noise
])



In [None]:
BATCH_SIZE = 64

root_directory = os.path.join(os.getcwd(), 'leafsnap-dataset') # you make need to edit this path to work, though, it works on Colab by default and works locally if you keep the dataset at the root of the repo
image_paths_file = os.path.join(path_to_project_files, "1rev.txt")
dataset = LeafsnapDataset(image_paths_file, root_directory, use_segmented=True, source="field", transform=processor)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

## Training the Convolutional Neural Network (CNN)

This model uses a modified version of ResNet from Homework 2. It specifically is a version of ResNet34, with kernel size raised to 5, and skip layers at sizes 32, 64, and 128. Images have been downscaled to 128x128, and the segmentation image is used as a 4th layer, resulting in an input size of 4x128x128.

In [8]:
model = resnet(4, 185, device=device)

train_resnet_model(model, dataloader, 25, .001, device=device)


Epoch 1/25: 100%|██████████| 121/121 [10:20<00:00,  5.13s/it, batch=121/121, loss=5.03]


Epoch 0 loss: 5.439360906269925


Epoch 2/25: 100%|██████████| 121/121 [10:11<00:00,  5.05s/it, batch=121/121, loss=4.53]


Epoch 1 loss: 4.6623787407047494


Epoch 3/25: 100%|██████████| 121/121 [10:07<00:00,  5.02s/it, batch=121/121, loss=3.9]


Epoch 2 loss: 4.2863246369953


Epoch 4/25: 100%|██████████| 121/121 [10:10<00:00,  5.05s/it, batch=121/121, loss=3.94]


Epoch 3 loss: 3.9604296112848707


Epoch 5/25: 100%|██████████| 121/121 [10:07<00:00,  5.02s/it, batch=121/121, loss=3.53]


Epoch 4 loss: 3.696912062069601


Epoch 6/25: 100%|██████████| 121/121 [10:09<00:00,  5.03s/it, batch=121/121, loss=3.44]


Epoch 5 loss: 3.476209532130848


Epoch 7/25: 100%|██████████| 121/121 [10:09<00:00,  5.04s/it, batch=121/121, loss=2.94]


Epoch 6 loss: 3.2517955047039946


Epoch 8/25: 100%|██████████| 121/121 [10:09<00:00,  5.04s/it, batch=121/121, loss=2.6]


Epoch 7 loss: 3.0985418449748647


Epoch 9/25: 100%|██████████| 121/121 [10:06<00:00,  5.01s/it, batch=121/121, loss=3.06]


Epoch 8 loss: 2.925561335461199


Epoch 10/25: 100%|██████████| 121/121 [10:03<00:00,  4.99s/it, batch=121/121, loss=2.76]


Epoch 9 loss: 2.771792951694205


Epoch 11/25: 100%|██████████| 121/121 [10:04<00:00,  4.99s/it, batch=121/121, loss=3.01]


Epoch 10 loss: 2.6711466213888375


Epoch 12/25: 100%|██████████| 121/121 [10:03<00:00,  4.98s/it, batch=121/121, loss=2.56]


Epoch 11 loss: 2.5510173375941507


Epoch 13/25: 100%|██████████| 121/121 [10:00<00:00,  4.96s/it, batch=121/121, loss=2.5]


Epoch 12 loss: 2.3933267790423938


Epoch 14/25: 100%|██████████| 121/121 [09:57<00:00,  4.93s/it, batch=121/121, loss=1.82]


Epoch 13 loss: 2.276606022819015


Epoch 15/25: 100%|██████████| 121/121 [09:58<00:00,  4.95s/it, batch=121/121, loss=1.85]


Epoch 14 loss: 2.2220378887554832


Epoch 16/25: 100%|██████████| 121/121 [09:59<00:00,  4.95s/it, batch=121/121, loss=2.46]


Epoch 15 loss: 2.1151350923806183


Epoch 17/25: 100%|██████████| 121/121 [09:57<00:00,  4.93s/it, batch=121/121, loss=2.32]


Epoch 16 loss: 2.0452153643300712


Epoch 18/25: 100%|██████████| 121/121 [09:57<00:00,  4.94s/it, batch=121/121, loss=1.78]


Epoch 17 loss: 1.9581950863530813


Epoch 19/25: 100%|██████████| 121/121 [09:55<00:00,  4.93s/it, batch=121/121, loss=2.1]


Epoch 18 loss: 1.8656652003280387


Epoch 20/25: 100%|██████████| 121/121 [09:58<00:00,  4.95s/it, batch=121/121, loss=1.71]


Epoch 19 loss: 1.8020712373670467


Epoch 21/25: 100%|██████████| 121/121 [09:55<00:00,  4.92s/it, batch=121/121, loss=1.56]


Epoch 20 loss: 1.7661464302993017


Epoch 22/25: 100%|██████████| 121/121 [09:49<00:00,  4.88s/it, batch=121/121, loss=1.75]


Epoch 21 loss: 1.6739214520809078


Epoch 23/25: 100%|██████████| 121/121 [09:51<00:00,  4.88s/it, batch=121/121, loss=1.8]


Epoch 22 loss: 1.6060193786936359


Epoch 24/25: 100%|██████████| 121/121 [09:51<00:00,  4.89s/it, batch=121/121, loss=1.23]


Epoch 23 loss: 1.5243614291356615


Epoch 25/25: 100%|██████████| 121/121 [09:48<00:00,  4.87s/it, batch=121/121, loss=1.61]

Epoch 24 loss: 1.4963856699052922





In [9]:
torch.save(model.state_dict(), path_to_project_files + "cnn_model.pth")

## Testing the CNN

In [10]:
correct_top1 = 0
correct_top5 = 0
total = 0

model.to(device)
with torch.no_grad(): # No gradients needed for evaluation
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)

        # Top-1 Accuracy
        _, predicted = torch.max(outputs, 1)
        correct_top1 += (predicted == labels).sum().item()

        # Top-5 Accuracy
        top5_preds = torch.topk(outputs, 5, dim=1).indices
        correct_top5 += torch.sum(top5_preds.eq(labels.view(-1, 1))).item()

        total += labels.size(0)

# Compute accuracies
top1_accuracy = 100 * correct_top1 / total
top5_accuracy = 100 * correct_top5 / total

print(f"Top-1 Accuracy: {top1_accuracy:.2f}%")
print(f"Top-5 Accuracy: {top5_accuracy:.2f}%")


Top-1 Accuracy: 57.87%
Top-5 Accuracy: 89.49%


After various tweaks, I'm very happy with the current training accuracy of the CNN model, especially for the first check-in. Running at a 90% Top-5 accuracy is excellent, although there is certainly some more hyperparameter tweaking to be done. I may also test changing the model's structure, adding techniques such as dropout that have been used in other models for similar purposes. I would like to reach 80% Top-1 accuracy by the end.