In [1]:
## Libraries used in the Assignment
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import numpy as np
import os

## Setting the Features and Target

In [2]:
class FashionMNISTDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = data.numpy()        # The data is converted from a PyTorch tensor to a numpy array for easier manipulation
        self.targets = targets          #labels
        self.transform = transform      #transformations to be applied to the data if necessary
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        image = self.data[idx].astype(np.uint8) #We want to retrieve the examples by index and convert them to a uint8 type
        label = int(self.targets[idx]) #Retrieve the label of the example and convert to integer
        
        if self.transform:
            image = self.transform(image) #Applying transforms like normalization, resizing, etc.
        else:
            
            image = torch.FloatTensor(image) / 255.0 #Dividing by the maximum value of the grey scale to normalize the data
            image = image.unsqueeze(0)  # Adding the 1 channel dimension to the image tensor grey scale
            
        return image, label



## Neural Network Architecture

In [3]:
class FashionMNISTNet(nn.Module):
    def __init__(self):
        super(FashionMNISTNet, self).__init__()
        
        # First Convolutional Block starts with 1 input channel because the images are grey scale
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2, stride=2)  
        )
        
        # Second Convolutional Block Receives the 32 output channels from the first block
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2)  # batch_size [64, 7, 7]
        )
        
        # Third Convolutional Block Receives the 64 output channels from the second block
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=2, stride=2)  # batch_size [128, 3, 3]
        )
        
        self.flatten = nn.Flatten() #We now convert the 3D output from the last convolutional block to a 1D vector i.e 128 x 3 x 3 = 1152
        
        #Now we work on the 1d vector using fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(128 * 3 * 3, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 10) #The output layer has 10 neurons because we have 10 classes for the Fashion MNIST dataset
        )
        
    def forward(self, x):
        ''' Goal here is to apply the previously defined layers in the forward pass of the network'''
        x = self.conv1(x)          
        x = self.conv2(x)          
        x = self.conv3(x)          
        x = self.flatten(x)        
        x = self.fc(x)            
        return x

## Data Loader

In [4]:
def create_data_loaders(batch_size=64):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))]) #Goal here is to transform the data to a tensor and normalize it for the neural network architecture
    
    #Loading the FashionMNIST train data from Pytorch datasets in its original form
    train_dataset = datasets.FashionMNIST(
        root='./data',
        train=True,
        download=True,
        transform=None  )
    
    #Loading the FashionMNIST test data from Pytorch datasets in its original form
    test_dataset = datasets.FashionMNIST(
        root='./data',
        train=False,
        download=True,
        transform=None )
    
    #WE now extract the data and labels from the datasets and apply the transformations to them
    custom_train_dataset = FashionMNISTDataset(
        train_dataset.data,
        train_dataset.targets,
        transform=transform) 
    
    custom_test_dataset = FashionMNISTDataset(
        test_dataset.data,
        test_dataset.targets,
        transform=transform)
    
   
    train_loader = DataLoader(
        custom_train_dataset,
        batch_size=batch_size,
        shuffle=True)
    
    test_loader = DataLoader(
        custom_test_dataset,
        batch_size=batch_size,
        shuffle=False)
    
    return train_loader, test_loader



## Training the data with the Neural Network Architecture Built

In [5]:
def train_model(model, train_loader, epochs=10, learning_rate=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(epochs):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            if batch_idx % 100 == 0:
                print(f'Epoch: {epoch}, Batch: {batch_idx}, Loss: {loss.item():.4f}')

# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
    return accuracy



In [8]:
# Save model weights
def save_model(model, path='model.pt'):
    torch.save(model.state_dict(), path)

# Load model weights
def load_model(path='model.pt'):
    model = FashionMNISTNet()
    model.load_state_dict(torch.load(path))
    model.eval()
    return model

## Running the NN Module

In [None]:
train_loader, test_loader = create_data_loaders(batch_size=64)

model = FashionMNISTNet()

model = train_model(model, train_loader, epochs=10, learning_rate=0.001)

accuracy = evaluate_model(model, test_loader)
print(f'Test Accuracy: {accuracy:.2f}%')

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 11122795.74it/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 257795.37it/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 4480288.11it/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 8970617.78it/s]


Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Epoch: 0, Batch: 0, Loss: 2.3323
Epoch: 0, Batch: 100, Loss: 0.3912
Epoch: 0, Batch: 200, Loss: 0.5291
Epoch: 0, Batch: 300, Loss: 0.4307
Epoch: 0, Batch: 400, Loss: 0.5122
Epoch: 0, Batch: 500, Loss: 0.1563
Epoch: 0, Batch: 600, Loss: 0.3422
Epoch: 0, Batch: 700, Loss: 0.3656
Epoch: 0, Batch: 800, Loss: 0.4270
Epoch: 0, Batch: 900, Loss: 0.3002
Epoch: 1, Batch: 0, Loss: 0.2173
Epoch: 1, Batch: 100, Loss: 0.2629
Epoch: 1, Batch: 200, Loss: 0.2655
Epoch: 1, Batch: 300, Loss: 0.2803
Epoch: 1, Batch: 400, Loss: 0.2923
Epoch: 1, Batch: 500, Loss: 0.2105
Epoch: 1, Batch: 600, Loss: 0.2467
Epoch: 1, Batch: 700, Loss: 0.1749
Epoch: 1, Batch: 800, Loss: 0.3282
Epoch: 1, Batch: 900, Loss: 0.3768
Epoch: 2, Batch: 0, Loss: 0.1442
Epoch: 2, Batch: 100, Loss: 0.2409
Epoch: 2, Batch: 200, Loss: 0.1589
Epoch: 2, Batch: 300, Loss: 0.3776
Epoch: 2, Batch: 400, Loss: 0.2509
Epoch: 2, Batch: 500, Loss: 0.3021
Epoch: 

## Saving weights and model

In [11]:
PATH = "model.pt"
EPOCH = 10

# The save function creates a binary storing all our data for us
torch.save({
            'epoch': EPOCH,
            'model_state_dict': model.state_dict(),
            }, PATH)

In [14]:
PATH = "model.pt"

# Create a new "blank" model to load our information into
model = FashionMNISTNet()

# Recreate our optimizer
#optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Load back all of our data from the file
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
#optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
EPOCH = checkpoint['epoch']

In [15]:
def save_model_weights(model, path='model_weights.pt'):
    torch.save(model.state_dict(), path)
    print(f"Model weights saved to {path}")

save_model_weights(model, 'model_weights.pt')


Model weights saved to model_weights.pt
