# Project I - Image Classification

**Team**: Filip Kołodziejczyk, Jerzy Kraszewski

## Introduction

The goal of this project is to create a model that can classify images of 10 different classes. The dataset used for this project is the CINIC-10 dataset, which is a combination of CIFAR-10 and ImageNet. The dataset contains 270,000 images, which are divided into 10 classes of equal size. The classes are: airplane, automobile, bird, cat, deer, dog, frog, horse, ship, and truck. The images are 32x32 pixels in size and are in RGB format. Data is divided into training, validation, and test sets, equally for each class.
More details about the dataset can be found [here](https://datashare.ed.ac.uk/handle/10283/3192) and [here](https://www.kaggle.com/datasets/mengcius/cinic10/data).

TODO: Add citation for this dataset

## Environment setup

We load all the necessary libraries and set an appropriate backend for the PyTorch for most optimal performance.

In [39]:
import torch
from torchvision.transforms import v2
from torchvision import models, datasets
from torch.utils.data import DataLoader, Subset
import torch.nn as nn
import torch.optim as optim
from zipfile import ZipFile
import os
import shutil
import pandas as pd
import timm

if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

Using device: mps


## Extracting and adjusting data split

The original dataset has predefined split of data. We adjust it there.

In [15]:
# Dataset must be downloaded from the link provided in Introduction and put into `data` directory.
# It should be renamed to `cinic10.zip`.

archive_path = "data/cinic10.zip"
data_dir = "data/cinic10"
data_subdirs = ["train", "test", "valid"]
props = [0.7, 0.15, 0.15]

if sum(props) != 1:
    raise ValueError("Props must sum to 1")

# Extracting the data
with ZipFile(archive_path, "r") as zip_ref:
    zip_ref.extractall(data_dir)

classes = os.listdir(os.path.join(data_dir, "train"))

# Changing the data split
for cls in classes:
    dirs = [os.path.join(data_dir, subdir, cls) for subdir in data_subdirs]
    sizes = [len(os.listdir(d)) for d in dirs]
    total = sum(sizes)
    target_sizes = [int(p * total) for p in props]
    diffs = [target_sizes[i] - sizes[i] for i in range(len(sizes))]

    for i in range(len(diffs)):
        if diffs[i] < 0:
            for j in range(len(diffs)):
                if diffs[j] > 0:
                    count = min(abs(diffs[i]), diffs[j])
                    files = os.listdir(dirs[i])
                    files = files[:count]
                    for f in files:
                        shutil.move(os.path.join(dirs[i], f), os.path.join(dirs[j], f))
                    diffs[i] += count
                    diffs[j] -= count

# Checking the sizes
cls_sizes = {}
for cls in classes:
    cls_sizes[cls] = [len(os.listdir(os.path.join(data_dir, subdir, cls))) for subdir in data_subdirs]
pd.DataFrame.from_dict(cls_sizes, orient='index', columns=[f"{set} size" for set in data_subdirs])

Unnamed: 0,train size,test size,valid size
cat,18900,4050,4050
dog,18900,4050,4050
truck,18900,4050,4050
bird,18900,4050,4050
airplane,18900,4050,4050
ship,18900,4050,4050
frog,18900,4050,4050
horse,18900,4050,4050
deer,18900,4050,4050
automobile,18900,4050,4050


## Loading and augumenting the data

In [43]:
batch_size = 128

# No augumentation for now except default for ResNet
transforms = v2.Compose([
    v2.Resize((224, 224)),
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 
])

# Create datasets
train = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=transforms)
test = datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=transforms)
valid = datasets.ImageFolder(os.path.join(data_dir, 'valid'), transform=transforms)

# Taking a subsets for rapid prototyping
train_subset_indices = torch.randperm(len(train))[:10000]
test_subset_indices = torch.randperm(len(test))[:1000]
valid_subset_indices = torch.randperm(len(valid))[:1000]

train = Subset(train, train_subset_indices)
test = Subset(test, test_subset_indices)
valid = Subset(valid, valid_subset_indices)

# Print dataset sizes
print(f"Train size: {len(train)}")
print(f"Test size: {len(test)}")
print(f"Valid size: {len(valid)}")

# Create dataloaders
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test, batch_size=batch_size, shuffle=False)
valid_loader = DataLoader(valid, batch_size=batch_size, shuffle=False)

Train size: 10000
Test size: 1000
Valid size: 1000


## Defining the ResNet model

In [41]:
model = timm.create_model("resnet18", pretrained=True, num_classes=len(classes))
model = model.to(device)
optim = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_fn = nn.CrossEntropyLoss()

## Training the model

In [44]:
epochs = 1
model.train()

for epoch in range(epochs):
    running_loss = 0.0
    correct = 0
    total = 0
    batch_count = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        batch_count += 1

        optim.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optim.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = correct / total

    print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = loss_fn(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    valid_loss = running_loss / len(valid_loader.dataset)
    valid_acc = correct / total

    print(f'Validation Loss: {valid_loss:.4f}, Validation Accuracy: {valid_acc:.4f}')

## Model evaluation

In [37]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')

Accuracy: 0.8158
