# ML Project 2


## Imports and Such


In [82]:
import subprocess

# Installation on Google Colab
try:
    import os
    import google.colab
    subprocess.run(['python', '-m', 'pip', 'install', 'skorch', 'torchvision'])
    subprocess.run(['mkdir', '-p', 'datasets'])
    subprocess.run(['wget', '-nc', '--no-check-certificate',
                   'https://download.pytorch.org/tutorial/hymenoptera_data.zip', '-P', 'datasets'])
    subprocess.run(
        ['unzip', '-u', 'datasets/hymenoptera_data.zip', '-d' 'datasets'])
except ImportError:
    pass

In [83]:
import os
from urllib import request
from zipfile import ZipFile

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torchvision import datasets, models, transforms

from skorch import NeuralNetClassifier
from skorch.helper import predefined_split

torch.manual_seed(360)

<torch._C.Generator at 0x26b3194e590>

## Loading dataset


In [84]:
data_dir = 'data/'
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])
val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_ds = datasets.ImageFolder(
    os.path.join(data_dir, 'train'), train_transforms)
val_ds = datasets.ImageFolder(
    os.path.join(data_dir, 'valid'), val_transforms)

## Loading Pretrained Model

We use a pretrained `ResNet18` neural network model with its final layer replaced with a fully connected layer:


In [85]:
class PretrainedModel(nn.Module):
    def __init__(self, output_features):
        super().__init__()
        model = models.resnet18(pretrained=True)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, output_features)
        self.model = model

    def forward(self, x):
        return self.model(x)

### Callbacks


In [86]:
from skorch.callbacks import LRScheduler
from skorch.callbacks import Checkpoint
from skorch.callbacks import Freezer

lrscheduler = LRScheduler(
    policy='StepLR', step_size=7, gamma=0.1)

checkpoint = Checkpoint(
    f_params='best_model.pt', monitor='valid_acc_best')

freezer = Freezer(lambda x: not x.startswith('model.fc'))

### skorch.NeuralNetClassifier


In [94]:
net = NeuralNetClassifier(
    PretrainedModel,
    criterion=nn.CrossEntropyLoss,
    lr=0.001,
    batch_size=3,
    max_epochs=25,
    module__output_features=5,
    optimizer=optim.SGD,
    optimizer__momentum=0.9,
    iterator_train__shuffle=True,
    iterator_train__num_workers=2,
    iterator_valid__num_workers=2,
    train_split=predefined_split(val_ds),
    callbacks=[lrscheduler, checkpoint, freezer],
    classes=['Baseball', 'Basketball', 'Football', 'Hockey', 'Volleyball'],  # Fix for my error
    device='cpu' # Change to 'cuda' if you have a GPU
)

# 95% acc with lr=0.001 and batch size of 4
# 

In [95]:
import os.path

if not os.path.isfile('best_model.pt'):
    net.fit(train_ds, y=None)



  epoch    train_loss    valid_acc    valid_loss    cp      lr      dur
-------  ------------  -----------  ------------  ----  ------  -------
      1        [36m1.5287[0m       [32m0.8603[0m        [35m0.4927[0m     +  0.0010  64.3417
      2        [36m1.1403[0m       0.8329        [35m0.4357[0m        0.0010  60.4716
      3        [36m0.9670[0m       0.8603        [35m0.4002[0m        0.0010  68.7568
      4        1.0358       [32m0.8959[0m        [35m0.2799[0m     +  0.0010  58.0561
      5        [36m0.9049[0m       [32m0.8986[0m        [35m0.2737[0m     +  0.0010  61.9801


In [89]:
from PIL import Image
model_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

net = NeuralNetClassifier(
    PretrainedModel,
    criterion=nn.CrossEntropyLoss,
    module__output_features=5,
)

# LOAD MODEL FROM FILE
net.initialize()
net.load_params(f_params='best_model.pt')

test_dir = 'data/test/'
test_images = os.listdir(test_dir)

classes = ['Baseball', 'Basketball', 'Football', 'Hockey', 'Volleyball']
class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}

correct = 0
total = 0

# Test the model
for img_name in test_images:
    img_path = os.path.join(test_dir, img_name)
    img = Image.open(img_path)
    img_transformed = model_transforms(img).unsqueeze(0)

    # Extract class name from filename by removing digits and extension
    class_name = ''.join(filter(str.isalpha, img_name.split('.')[0])).capitalize()

    # Prediction
    output = net.predict(img_transformed)
    predicted = output[0]

    # Check prediction

    if predicted == class_to_idx[class_name]:
        correct += 1
    else:
        print(f'Predicted: {classes[predicted]}, Actual: {class_name}, filename: {img_name}')
    total += 1

# Calculate accuracy
accuracy = correct / total
print(f'Accuracy: {accuracy * 100:.2f}%')



Predicted: Football, Actual: Baseball, filename: baseball4.jpg
Predicted: Basketball, Actual: Baseball, filename: baseball5.jpg
Predicted: Volleyball, Actual: Basketball, filename: basketball3.jpg
Accuracy: 88.00%
