## Google Colab setup (don't run locally)

In [1]:
from google.colab import drive
import os
import shutil

# Mount drive to colab
drive.mount('/content/drive', force_remount=True)

# setting up paths
path_to_project_files = '/content/drive/MyDrive/School/Homework/Spring2025/DL/Project/'
existing = os.path.join(path_to_project_files, 'kaggle.json')
path_to_colab_utils = '/root/.kaggle'
target = os.path.join(path_to_colab_utils, 'kaggle.json')

# move the key to the colab root
os.makedirs(path_to_colab_utils, exist_ok=True)
shutil.copy(existing, target)
os.chmod(target, 600)

# download the data into /content (which is temporary)
!kaggle datasets download -d xhlulu/leafsnap-dataset -p /content --unzip

Mounted at /content/drive
Dataset URL: https://www.kaggle.com/datasets/xhlulu/leafsnap-dataset
License(s): copyright-authors


In [None]:
import sys

# Edit this path to where you've uploaded the repo files, so the imports work.
sys.path.append('/content/drive/MyDrive/School/Homework/Spring2025/DL/Project/')

## Library Imports

In [None]:
from autoencoder import *
from dataloader import *
from cnn import *

import torch
from torch.utils.data import DataLoader
from torchvision.transforms import v2
import os

In [None]:
import matplotlib.pyplot as plt

def showTensorInNotebook(tensor):
    """
    This takes a (3[RGB], H, W) tensor in R[0.0, 1.0] and displays it with matplotlib.
    """
    image = tensor.detach().cpu().numpy().transpose(1,2,0) # move the channel axis to the end, because PIL and matplotlib hate each other
    plt.imshow(image)
    plt.axis('off')
    plt.show()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


## Building the data loader

These are transforms that allow us to ingest the image tensors with some extra confusion at training time. `processor` makes the data loader spit out tensors, and `noiser` adds Gaussian noise.

In [None]:
# This just processes the images.
NOISE_RATIO = 0.1
H, W = 256, 256

processor = v2.Compose([
    v2.PILToTensor(), # the LeafsnapDataset class gives PIL Images, convert to torch Tensor
    v2.Resize((H, W)), # resize
    v2.RandomHorizontalFlip(),
    v2.RandomVerticalFlip(),
    v2.RandomRotation(degrees=(-30, 30)),
    lambda x: x / 255.0, # convert N[0, 255] to R[0.0, 1.0]
    lambda x: torch.clip(x + NOISE_RATIO*torch.randn_like(x), 0.0, 1.0), # add noise
])

lab_processor = v2.Compose([
    v2.PILToTensor(), # the LeafsnapDataset class gives PIL Images, convert to torch Tensor
    lambda x: v2.functional.crop(x, 0, 0, 600, 600),
    v2.Resize((H, W)), # resize
    v2.RandomHorizontalFlip(),
    v2.RandomVerticalFlip(),
    v2.RandomRotation(degrees=(-30, 30)),
    lambda x: x / 255.0, # convert N[0, 255] to R[0.0, 1.0]
    lambda x: torch.clip(x + NOISE_RATIO*torch.randn_like(x), 0.0, 1.0), # add noise
])

test_processor = v2.Compose([
    v2.PILToTensor(), # the LeafsnapDataset class gives PIL Images, convert to torch Tensor
    v2.Resize((H, W)), # resize
    lambda x: x / 255.0, # convert N[0, 255] to R[0.0, 1.0]
])


In [None]:
BATCH_SIZE = 32

root_directory = os.path.join(os.getcwd(), 'leafsnap-dataset') # you make need to edit this path to work, though, it works on Colab by default and works locally if you keep the dataset at the root of the repo
train_image_paths_file = os.path.join(path_to_project_files, "train.txt")
train_dataset = LeafsnapDataset(train_image_paths_file, root_directory, use_segmented=False, source="both", transform=processor, lab_transform=lab_processor)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

validation_image_paths_file = os.path.join(path_to_project_files, "validation.txt")
validation_dataset = LeafsnapDataset(validation_image_paths_file, root_directory, use_segmented=False, source="both", transform=processor, lab_transform=lab_processor)
validation_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True)

test_image_paths_file = os.path.join(path_to_project_files, "test.txt")
test_dataset = LeafsnapDataset(test_image_paths_file, root_directory, use_segmented=False, source="both", transform=test_processor, lab_transform=lab_processor)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

## Training the Convolutional Neural Network (CNN)

This model uses a modified version of ResNet from Homework 2. It specifically is a version of ResNet34, with kernel size raised to 5, and skip layers at sizes 32, 64, and 128. Images have been downscaled to 128x128, and the segmentation image is used as a 4th layer, resulting in an input size of 4x128x128.

In [None]:
model = resnet(3, 185, device=device)
model.load_state_dict(torch.load(os.path.join(path_to_project_files, 'cnn_model.pth'), weights_only=True))

train_resnet_model(model, train_dataloader, validation_dataloader, 20, .001, device=device)

Epoch 1/20: 100%|██████████| 342/342 [08:33<00:00,  1.50s/it, batch=342/342, loss=3.99]
Test Epoch 1/20: 100%|██████████| 43/43 [00:33<00:00,  1.28it/s, batch=43/43, test_loss=3.29]


Epoch 1 loss: 3.6492191650714094, val loss: 3.7890299420024074


Epoch 2/20: 100%|██████████| 342/342 [08:32<00:00,  1.50s/it, batch=342/342, loss=4.04]
Test Epoch 2/20: 100%|██████████| 43/43 [00:34<00:00,  1.26it/s, batch=43/43, test_loss=2.93]


Epoch 2 loss: 3.3935030252612823, val loss: 3.307366836902707


Epoch 3/20: 100%|██████████| 342/342 [08:33<00:00,  1.50s/it, batch=342/342, loss=3.82]
Test Epoch 3/20: 100%|██████████| 43/43 [00:33<00:00,  1.28it/s, batch=43/43, test_loss=2.96]


Epoch 3 loss: 3.18974488869048, val loss: 3.599104886831239


Epoch 4/20: 100%|██████████| 342/342 [08:32<00:00,  1.50s/it, batch=342/342, loss=3.2]
Test Epoch 4/20: 100%|██████████| 43/43 [00:33<00:00,  1.27it/s, batch=43/43, test_loss=4.43]


Epoch 4 loss: 3.0089794355526305, val loss: 4.3354515752127005


Epoch 5/20: 100%|██████████| 342/342 [08:33<00:00,  1.50s/it, batch=342/342, loss=3.44]
Test Epoch 5/20: 100%|██████████| 43/43 [00:34<00:00,  1.26it/s, batch=43/43, test_loss=2.65]


Epoch 5 loss: 2.8740871837961746, val loss: 3.034803723180017


Epoch 6/20: 100%|██████████| 342/342 [08:33<00:00,  1.50s/it, batch=342/342, loss=3.64]
Test Epoch 6/20: 100%|██████████| 43/43 [00:33<00:00,  1.28it/s, batch=43/43, test_loss=4.67]


Epoch 6 loss: 2.7342682581895974, val loss: 3.629049905510836


Epoch 7/20: 100%|██████████| 342/342 [08:34<00:00,  1.51s/it, batch=342/342, loss=3.1]
Test Epoch 7/20: 100%|██████████| 43/43 [00:34<00:00,  1.25it/s, batch=43/43, test_loss=2.82]


Epoch 7 loss: 2.6223694357955667, val loss: 3.14322558114695


Epoch 8/20: 100%|██████████| 342/342 [08:32<00:00,  1.50s/it, batch=342/342, loss=3.24]
Test Epoch 8/20: 100%|██████████| 43/43 [00:33<00:00,  1.28it/s, batch=43/43, test_loss=3.5]


Epoch 8 loss: 2.518923728786714, val loss: 3.434870819712794


Epoch 9/20: 100%|██████████| 342/342 [08:33<00:00,  1.50s/it, batch=342/342, loss=2.25]
Test Epoch 9/20: 100%|██████████| 43/43 [00:34<00:00,  1.26it/s, batch=43/43, test_loss=3.7]


Epoch 9 loss: 2.4048568463464925, val loss: 2.7042835618174355


Epoch 10/20: 100%|██████████| 342/342 [08:32<00:00,  1.50s/it, batch=342/342, loss=4.77]
Test Epoch 10/20: 100%|██████████| 43/43 [00:32<00:00,  1.31it/s, batch=43/43, test_loss=4.24]


Epoch 10 loss: 2.3295940802111264, val loss: 3.9410534792168197


Epoch 11/20: 100%|██████████| 342/342 [08:30<00:00,  1.49s/it, batch=342/342, loss=3.07]
Test Epoch 11/20: 100%|██████████| 43/43 [00:33<00:00,  1.29it/s, batch=43/43, test_loss=3.05]


Epoch 11 loss: 2.234630935721927, val loss: 2.6116063511648844


Epoch 12/20: 100%|██████████| 342/342 [08:29<00:00,  1.49s/it, batch=342/342, loss=2.68]
Test Epoch 12/20: 100%|██████████| 43/43 [00:33<00:00,  1.28it/s, batch=43/43, test_loss=3.34]


Epoch 12 loss: 2.1410595778136225, val loss: 3.0183202555013255


Epoch 13/20: 100%|██████████| 342/342 [08:30<00:00,  1.49s/it, batch=342/342, loss=2.52]
Test Epoch 13/20: 100%|██████████| 43/43 [00:33<00:00,  1.30it/s, batch=43/43, test_loss=3.17]


Epoch 13 loss: 2.0678655613235564, val loss: 2.430154575858005


Epoch 14/20: 100%|██████████| 342/342 [08:30<00:00,  1.49s/it, batch=342/342, loss=2.09]
Test Epoch 14/20: 100%|██████████| 43/43 [00:33<00:00,  1.27it/s, batch=43/43, test_loss=5.65]


Epoch 14 loss: 2.0056070758585345, val loss: 4.480227176533189


Epoch 15/20: 100%|██████████| 342/342 [08:32<00:00,  1.50s/it, batch=342/342, loss=2.84]
Test Epoch 15/20: 100%|██████████| 43/43 [00:34<00:00,  1.26it/s, batch=43/43, test_loss=2.39]


Epoch 15 loss: 1.9230773759167097, val loss: 2.154624112816744


Epoch 16/20: 100%|██████████| 342/342 [08:30<00:00,  1.49s/it, batch=342/342, loss=2.18]
Test Epoch 16/20: 100%|██████████| 43/43 [00:32<00:00,  1.32it/s, batch=43/43, test_loss=1.59]


Epoch 16 loss: 1.8566918700759174, val loss: 1.9099135703818744


Epoch 17/20: 100%|██████████| 342/342 [08:32<00:00,  1.50s/it, batch=342/342, loss=2.14]
Test Epoch 17/20: 100%|██████████| 43/43 [00:34<00:00,  1.25it/s, batch=43/43, test_loss=2.7]


Epoch 17 loss: 1.8014644802662365, val loss: 3.5328893606052842


Epoch 18/20: 100%|██████████| 342/342 [08:32<00:00,  1.50s/it, batch=342/342, loss=2.39]
Test Epoch 18/20: 100%|██████████| 43/43 [00:33<00:00,  1.30it/s, batch=43/43, test_loss=5.21]


Epoch 18 loss: 1.739818641316821, val loss: 4.548547983169556


Epoch 19/20: 100%|██████████| 342/342 [08:30<00:00,  1.49s/it, batch=342/342, loss=2.35]
Test Epoch 19/20: 100%|██████████| 43/43 [00:33<00:00,  1.30it/s, batch=43/43, test_loss=2.16]


Epoch 19 loss: 1.6909921291278818, val loss: 1.9544205970542377


Epoch 20/20: 100%|██████████| 342/342 [08:29<00:00,  1.49s/it, batch=342/342, loss=3.74]
Test Epoch 20/20: 100%|██████████| 43/43 [00:33<00:00,  1.27it/s, batch=43/43, test_loss=2.28]

Epoch 20 loss: 1.6382464541677844, val loss: 1.8368195711180222





In [None]:
torch.save(model.state_dict(), path_to_project_files + "cnn_model.pth")

## Testing the CNN

In [None]:
correct_top1 = 0
correct_top5 = 0
total = 0

model.to(device)
with torch.no_grad(): # No gradients needed for evaluation
    model.eval()
    for inputs, labels in test_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)

        # Top-1 Accuracy
        _, predicted = torch.max(outputs, 1)
        correct_top1 += (predicted == labels).sum().item()

        # Top-5 Accuracy
        top5_preds = torch.topk(outputs, 5, dim=1).indices
        correct_top5 += torch.sum(top5_preds.eq(labels.view(-1, 1))).item()

        total += labels.size(0)

# Compute accuracies
top1_accuracy = 100 * correct_top1 / total
top5_accuracy = 100 * correct_top5 / total

print(f"Top-1 Accuracy: {top1_accuracy:.2f}%")
print(f"Top-5 Accuracy: {top5_accuracy:.2f}%")


Top-1 Accuracy: 50.91%
Top-5 Accuracy: 83.61%


After various tweaks, I'm very happy with the current training accuracy of the CNN model, especially for the first check-in. Running at a 90% Top-5 accuracy is excellent, although there is certainly some more hyperparameter tweaking to be done. I may also test changing the model's structure, adding techniques such as dropout that have been used in other models for similar purposes. I would like to reach 80% Top-1 accuracy by the end.