### Fashion MNIST Classification using Pytorch

#### Below we will take the training set of 60000 images of Fashion MNIST dataset and:
#### 1. Load Dataset
#### 2. Train a baseline model
#### 3. Hyperparameter tuning
#### 4. Train the refined model
#### 5. Evaluate the model on test set of 10000 images of Fashion MNIST dataset

### 1. Load Dataset
#### We import the required packages and load the training and testing dataset.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True) #Already on by default, we write to remember this point

from warnings import simplefilter 
simplefilter(action='ignore',category=FutureWarning)

from torch.utils.tensorboard import SummaryWriter
from collections import OrderedDict
from collections import namedtuple
from itertools import product
import time
import pandas as pd
from IPython.display import clear_output

In [2]:
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True,
    transform = transforms.Compose([
    transforms.ToTensor()
    ])
)

In [3]:
test_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = False,
    download = True,
    transform = transforms.Compose([
    transforms.ToTensor()
    ])
)

#### The model would predict probability of an image being in all the 10 classes. Using the below function, we get the max probability(hence the prediction by our model) and compare it with the true value to gain the total number of correct classifications.

In [4]:
def get_num_correct(preds,labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

#### Below we define our model. We would be building 2 Convolution Layers following with 2 Fully Connected Layers and then an output layer.

In [5]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels = 1,out_channels = 6, kernel_size = 5)
        self.conv2 = nn.Conv2d(in_channels = 6,out_channels = 12,kernel_size = 5)
        
        self.fc1 = nn.Linear(in_features = 12*4*4,out_features = 120)
        self.fc2 = nn.Linear(in_features = 120, out_features = 60)
        
        self.out = nn.Linear(in_features = 60, out_features = 10)
        
    def forward(self,t):
        t = t
        
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t,kernel_size = 2, stride = 2)
        
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t,kernel_size = 2, stride = 2)
        
        t = t.reshape(-1,12*4*4)
        t = self.fc1(t)
        t = F.relu(t)
        
        t = self.fc2(t)
        t = F.relu(t)
        
        t = self.out(t)
        
        return t

### 2. Baseline Model

In [6]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set,batch_size=1000)
optimizer = optim.Adam(network.parameters(), lr= 0.001)

for epoch in range(5):
    total_loss = 0
    total_correct = 0

    for batch in train_loader:
        images,labels = batch

        preds = network(images)#Pass Batch
        loss = F.cross_entropy(preds,labels)#Calculate Loss

        optimizer.zero_grad() #Pytorch accumulates gradients i.e. it adds. We want new gradient and so we zero out everytime

        loss.backward()#Calculate Gradient
        optimizer.step()#Update Weight

        total_loss = total_loss + loss.item()
        total_correct = total_correct + get_num_correct(preds,labels)

    print('Epoch:',epoch,"Total Correct:",total_correct,"Loss:",total_loss)

Epoch: 0 Total Correct: 28561 Loss: 93.50459921360016
Epoch: 1 Total Correct: 41330 Loss: 48.831675827503204
Epoch: 2 Total Correct: 44051 Loss: 41.601802110672
Epoch: 3 Total Correct: 45336 Loss: 38.08242577314377
Epoch: 4 Total Correct: 46369 Loss: 35.712429225444794


In [7]:
total_correct/len(train_set) #Accuracy

0.7728166666666667

#### Below we define a function to make the predictions on the test set.

In [8]:
def get_all_preds(model,loader):
    all_preds = torch.tensor([])#Initialize empty tensor
    
    for batch in loader:#read data batch wise from loader, predict batchwise and concatenate results.
        images,labels = batch
        
        preds = model(images)
        all_preds = torch.cat((all_preds,preds),dim=0) #Hence get all predictions for our test set
    return all_preds

In [9]:
prediction_loader = torch.utils.data.DataLoader(test_set,batch_size = 1000)
test_preds = get_all_preds(network,prediction_loader)

In [10]:
preds_correct = get_num_correct(test_preds,test_set.targets)

print("Total Correct:",preds_correct)
print("Accuracy:",preds_correct/len(test_set))

Total Correct: 7736
Accuracy: 0.7736


### So, we got a training accuracy of 77.28 % and test accuracy of 77.36 %. Let us look at finding optimal parameters to find a model with better accuracy.

### 3. Hyperparameter Tuning

In [11]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        
        Run = namedtuple('Run',params.keys())#Making Run(keys)
        runs = []
        
        for v in product(*params.values()):#Appending values to keys using Cartesian Product
            runs.append(Run(*v))
            
        return runs    

In [12]:
class RunManager():
    def __init__(self):#Definding attributes to keep track of data across epochs and runs
        
        #Some attributes for epochs
        self.epoch_count=0#Number of epochs
        self.epoch_loss = 0#Loss for epoch
        self.epoch_num_correct = 0 #Correct number of predictions
        self.epoch_start_time = None
        
        #Some attributes for runs
        self.run_params = None#Value from run builder
        self.run_count = 0
        self.run_data = []#Keep track of parameter values
        self.run_start_time = None
        
        self.network = None#Save network for the run
        self.loader = None#Save dataloader for the run
        self.tb = None#Save data to tensorboard
        
    def begin_run(self,run,network,loader):
        
        self.run_start_time = time.time()
        self.run_params = run
        self.run_count += 1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment = f'-{run}')
        
        images,labels = next(iter(loader))
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image('images',grid)
        self.tb.add_graph(network,images)

    
    def end_run(self):
        self.tb.close()#Close tensorboard handle
        self.epoch_count = 0#Set epoch_count to 0 to be ready for next run
        
    def begin_epoch(self):
        self.epoch_start_time = time.time()
        self.epoch_count +=1
        self.epoch_loss = 0
        self.epoch_num_correct = 0 
        
    def end_epoch(self):
        #When ending epoch we need to do some summary calculations
        
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time
        
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)
        
        self.tb.add_scalar('Loss',loss,self.epoch_count)
        self.tb.add_scalar('Accuracy',accuracy,self.epoch_count)
        
        for name,param in network.named_parameters():#Give the histogram for all the layers.
            self.tb.add_histogram(name,param,self.epoch_count)
            self.tb.add_histogram(f'{name}.grad',param.grad,self.epoch_count)
            
        ## Now creating a summary table outside of Tensorboard analysis
        
        results = OrderedDict()
        results['run'] = self.run_count
        results['epoch'] = self.epoch_count
        results['loss'] = loss
        results['accuracy'] = accuracy
        results['epoch duration'] = epoch_duration
        results['run duration'] = run_duration
        
        for k,v in self.run_params._asdict().items():
            results[k] = v#Adding Parameters keys and values inside the result dictionary
            
        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data,orient='columns')
        
        clear_output(wait=True)#For Jupyter notebook to clear whatever output is
        display(df)#Display the new DF
        
    def track_loss(self,loss):
        self.epoch_loss += loss.item() * self.loader.batch_size
        
    def track_num_correct(self,preds,labels):
        self.epoch_num_correct += self._get_num_correct(preds,labels)
    
    @torch.no_grad()
    def _get_num_correct(self,preds,labels):#_before name suggests reader that this is being used within the class
        return preds.argmax(dim=1).eq(labels).sum().item()

In [13]:
params = OrderedDict(
lr = [0.01,0.001],
batch_size=[100,1000,10000]
)
m = RunManager()
for run in RunBuilder.get_runs(params):
    
    network = Network()
    loader = torch.utils.data.DataLoader(train_set,batch_size=run.batch_size)
    optimizer = optim.Adam(network.parameters(), lr= run.lr)

    m.begin_run(run,network,loader)
       
    for epoch in range(5):
        m.begin_epoch()
        
        for batch in loader:
            images,labels = batch

            preds = network(images)#Pass Batch
            loss = F.cross_entropy(preds,labels)#Calculate Loss

            optimizer.zero_grad() #Pytorch accumulates gradients i.e. it adds. We want new gradient and so we zero out everytime

            loss.backward()#Calculate Gradient
            optimizer.step()#Update Weight
            
            m.track_loss(loss)
            m.track_num_correct(preds,labels)
        
        m.end_epoch()
    m.end_run()

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size
0,1,1,0.551635,0.791583,11.545965,11.68849,0.01,100
1,1,2,0.387204,0.856317,11.735774,23.56605,0.01,100
2,1,3,0.357973,0.867367,11.920362,35.619498,0.01,100
3,1,4,0.340564,0.874717,11.734072,47.495862,0.01,100
4,1,5,0.33186,0.877817,11.608809,59.235632,0.01,100
5,2,1,0.936382,0.64605,9.430365,10.027687,0.01,1000
6,2,2,0.506092,0.8079,9.550581,19.729244,0.01,1000
7,2,3,0.418793,0.84545,9.495852,29.345749,0.01,1000
8,2,4,0.365571,0.8649,9.697335,39.188346,0.01,1000
9,2,5,0.338059,0.874367,10.102572,49.434483,0.01,1000


#### The best accuracy is obtained using batch size of 100 and learning rate of 0.01. We train the model for 5 epochs and print the total loss and correct classified samples after each epoch is completed.

### 4. Train the refined model

In [14]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set,batch_size=100)
optimizer = optim.Adam(network.parameters(), lr= 0.01)

for epoch in range(5):
    total_loss = 0
    total_correct = 0

    for batch in train_loader:
        images,labels = batch

        preds = network(images)#Pass Batch
        loss = F.cross_entropy(preds,labels)#Calculate Loss

        optimizer.zero_grad() #Pytorch accumulates gradients i.e. it adds. We want new gradient and so we zero out everytime

        loss.backward()#Calculate Gradient
        optimizer.step()#Update Weight

        total_loss = total_loss + loss.item()
        total_correct = total_correct + get_num_correct(preds,labels)

    print('Epoch:',epoch,"Total Correct:",total_correct,"Loss:",total_loss)

Epoch: 0 Total Correct: 46852 Loss: 345.1322228163481
Epoch: 1 Total Correct: 51484 Loss: 229.37994062900543
Epoch: 2 Total Correct: 52156 Loss: 212.39699675142765
Epoch: 3 Total Correct: 52392 Loss: 203.60295145213604
Epoch: 4 Total Correct: 52737 Loss: 197.2160417586565


In [15]:
total_correct/len(train_set) #Accuracy

0.87895

### 5. Evaluate the model on the test set

In [16]:
prediction_loader = torch.utils.data.DataLoader(test_set,batch_size = 100)
test_preds = get_all_preds(network,prediction_loader)

In [17]:
preds_correct = get_num_correct(test_preds,test_set.targets)

print("Total Correct:",preds_correct)
print("Accuracy:",preds_correct/len(test_set))

Total Correct: 8615
Accuracy: 0.8615
