In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import numpy as np
import struct
import os

I didn't have experience with using ubyte format of data, i tried to import directly from the keras datasets but it was too large to import so i used the help of internet resources and AI to figure out how to read these files.

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def load_images(path):
    with open(path, 'rb') as f:
        magic, num, rows, cols = struct.unpack('>IIII', f.read(16))
        data = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 28, 28)
    return data

def load_labels(path):
    with open(path, 'rb') as f:
        magic, num = struct.unpack('>II', f.read(8))
        labels = np.frombuffer(f.read(), dtype=np.uint8)
    return labels


Using device: cuda


We will now create a class which inherits from the torch utils dataset, which is used to load the fashion mnist data in a way that PyTorch's DataLoader can work with. The init method initializes all the contents of the class, the len method gives the length of the labels, which is used by the Dataloader to find the length and the getitme method is used to retrieve a single sample when iterating through the DataLoader. 

In [21]:
class FashionMNISTDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img = Image.fromarray(self.images[idx], mode='L')
        if self.transform:
            img = self.transform(img)
        return img, self.labels[idx]


Now to convert our 1 channel grayscale images which are in 24 x 24, into images which fit with our Resnet model which accepts 3 channel data, so we convert the images into 3 channel RGB and 224 x 224. We are also normalizing the images so that their mean is 0 and standard dev is 1. 

In [22]:
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


Loading all the data from ubyte files and using the dataloaders to load the data into batches and shuffling them. Alos applying the transforms that we defined in the last cell.

In [23]:
train_images = load_images("train-images-idx3-ubyte")
train_labels = load_labels("train-labels-idx1-ubyte")
test_images = load_images("t10k-images-idx3-ubyte")
test_labels = load_labels("t10k-labels-idx1-ubyte")

train_dataset = FashionMNISTDataset(train_images, train_labels, transform=transform_train)
val_dataset = FashionMNISTDataset(test_images, test_labels, transform=transform_val)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)


Importing the pretrained ResNet model and freezing the layers of the resnet backbone by applying requires grad to false. Then creating our top layer on top of the resnet layers, We apply a linear model with 256 neurons, then applying the Relu function which simply applies all negative values to 0 and positive values to the same value. At the end one more neural layer which collects from 256 neuron layer to 10 output neurons which classify the data into finally 10 values.

In [24]:
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, 10)
)
model = model.to(device)


Now we unfreeze the lower levels of the model.

In [25]:
for name, param in model.named_parameters():
    if "layer4" in name or "fc" in name:
        param.requires_grad = True

Now we will define the optimizer which will be Adam and i ma using a learning rate of 0.0001, i tried to reduce it but the accuracy just decreased and i found this to be the best lr i could get to reduce overfitting. After that we define the training loop. 

In [26]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

def train_model(model, epochs=3):
    for epoch in range(epochs):
        model.train()
        total_loss, correct, total = 0.0, 0, 0
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if i % 50 == 0:
                print(f"  Batch {i+1}/{len(train_loader)} | Loss: {loss.item():.4f}")
        acc = 100 * correct / total
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}, Train Acc: {acc:.2f}%")
        scheduler.step()


Now to evaluate the model we use the validationg set to get the validation accuracy, we have changed the name of the test dataset to validation loader and used it to test the model. 

In [27]:
def evaluate_model(model):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    acc = 100 * correct / total
    print(f"Validation Accuracy: {acc:.2f}%")

print("GPU available:", torch.cuda.is_available())
train_model(model, epochs=3)
evaluate_model(model)

GPU available: True
  Batch 1/938 | Loss: 2.3265
  Batch 51/938 | Loss: 1.8261
  Batch 101/938 | Loss: 1.4752
  Batch 151/938 | Loss: 1.0733
  Batch 201/938 | Loss: 1.0699
  Batch 251/938 | Loss: 0.9255
  Batch 301/938 | Loss: 0.8745
  Batch 351/938 | Loss: 0.8164
  Batch 401/938 | Loss: 0.7938
  Batch 451/938 | Loss: 0.9775
  Batch 501/938 | Loss: 0.5607
  Batch 551/938 | Loss: 0.8053
  Batch 601/938 | Loss: 0.8526
  Batch 651/938 | Loss: 0.6568
  Batch 701/938 | Loss: 0.8315
  Batch 751/938 | Loss: 0.7686
  Batch 801/938 | Loss: 0.6961
  Batch 851/938 | Loss: 0.6733
  Batch 901/938 | Loss: 0.5725
Epoch [1/3], Loss: 858.6535, Train Acc: 71.73%
  Batch 1/938 | Loss: 0.6248
  Batch 51/938 | Loss: 0.4180
  Batch 101/938 | Loss: 0.5218
  Batch 151/938 | Loss: 0.6627
  Batch 201/938 | Loss: 0.6461
  Batch 251/938 | Loss: 0.5982
  Batch 301/938 | Loss: 0.4888
  Batch 351/938 | Loss: 0.7905
  Batch 401/938 | Loss: 0.5458
  Batch 451/938 | Loss: 0.5082
  Batch 501/938 | Loss: 0.5360
  Batch 5

Results : 

Accuracy - 83.5%
Loss - 485.20