# GRU for classification on Fashion MNIST

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets
import matplotlib.pyplot as plt

In [2]:
#Let's download data using torchvision
trainset = datasets.FashionMNIST('./../0. Data/', 
                                 download = True, 
                                 train = True, 
                                 transform = T.Compose([
                                     T.ToTensor()
                                 ]))

testset = datasets.FashionMNIST('./../0. Data/', 
                                 download = True, 
                                 train = False, 
                                 transform = T.Compose([
                                     T.ToTensor()
                                 ]))

#split training data to training and validation  data
train_set, val_set = torch.utils.data.random_split(trainset, [50000, 10000])

#Convert data to dataloader
train_loader = torch.utils.data.DataLoader(train_set, 
                                          batch_size = 32, 
                                          shuffle = True)

val_loader = torch.utils.data.DataLoader(val_set,
                                        batch_size = 32,
                                        shuffle = True)

test_loader = torch.utils.data.DataLoader(testset, 
                                         batch_size = 32, 
                                         shuffle = True)

full_train_set  = torch.utils.data.DataLoader(trainset, 
                                          batch_size = 32, 
                                          shuffle = True)


In [3]:
#Check for GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [4]:
#Create GRU class
class GRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRU, self).__init__()
        #save variables to use in other functions
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        #define GRU layer
        self.gru = nn.GRU(input_size=input_size, 
                          hidden_size=hidden_size, 
                          num_layers=num_layers, 
                          batch_first=True)
        
        #convert output to desired output dimension(readout layer)
        self.fc = nn.Linear(in_features=hidden_size, out_features=output_size)
        
    def forward(self, x):
        #call GRU layer
        x = x.view(x.shape[0],28,28)
        out, _ = self.gru(x)
        
        #We will use only last output
        out = self.fc(out[:,-1,:])
        return out
        


In [5]:
#define training function
def train(Model, validate, max_epoch):
    for epoch in range(max_epoch):
        Train_Loss = []
        Val_Loss =[]
        loader = full_train_set
        
        if(validate):
            loader = train_loader
        
        cnf_tr = torch.zeros(10,10)
        cnf_val = torch.zeros(10,10)
        
        #Train on training data
        for i, sample in enumerate(loader):

            #set model to train mode
            Model.train()
            #set gradiuents to zero
            optimizer.zero_grad()
            #obtain output
            output = Model(sample[0].to(device)).to(device)
            #compute loss
            loss = loss_function(output, sample[1].to(device))
            #compute gradients
            loss.backward()
            #optimize weights
            optimizer.step()
            #record train loss
            Train_Loss.append(loss.item())
            
            with torch.no_grad():
                #calculate output by argmax
                output = torch.argmax(output, 1)
                #update entries in confusion matrix
                for i in range(output.shape[0]):
                    cnf_tr[output[i],sample[1][i]] +=1
            
        if(validate):
            #Evaluate on validation data
            with torch.no_grad():
                #set model to evaluation mode
                Model.eval()
                #evaluate on tvaidation data
                for i, sample in enumerate(val_loader):
                    output = Model(sample[0].to(device))
                    loss = loss_function(output, sample[1].to(device))
                    Val_Loss.append(loss.item())
                    #calculate output by argmax
                    output = torch.argmax(output, 1)
                    #update entries in confusion matrix
                    for i in range(output.shape[0]):
                        cnf_val[output[i],sample[1][i]] +=1
                   
        actual_count = torch.sum(cnf_tr, dim=0)
        correct_pred = torch.tensor([cnf_tr[i,i] for i in range(10)])
        A_tr = (torch.sum(correct_pred)/torch.sum(actual_count)).item()
        
        if(validate):
            actual_count = torch.sum(cnf_val, dim=0)
            correct_pred = torch.tensor([cnf_val[i,i] for i in range(10)])
            A_val = (torch.sum(correct_pred)/torch.sum(actual_count)).item()
        
        #print losses in every epoch
        if(validate):
            print('epoch : ',epoch,'; Train_acc : ', np.round(A_tr,4), '; Val_acc : ', np.round(A_val,4),  
                  '; Train_loss  ',np.round(np.mean(Train_Loss),4),  '; Val_loss  ',np.round(np.mean(Val_Loss),4))
        else:
            print('epoch = ',epoch,'; Train_acc : ', np.round(A_tr,4), '; Train_loss  ',np.round(np.mean(Train_Loss),4))

In [6]:
#Function top evaluate model using performace metrices
def evaluate(cnf):
    actual_count = torch.sum(cnf, dim=0)
    predicted_count = torch.sum(cnf, dim=1)
    correct_pred = torch.tensor([cnf[i,i] for i in range(10)])
    #Precision
    precision = correct_pred/predicted_count
    #Recall
    recall = correct_pred/actual_count
    #F1-Score
    f1_score = 2*precision*recall/(precision+recall)
    #Accuracy
    Accuracy = torch.sum(correct_pred)/torch.sum(actual_count)
    print('\n',pd.DataFrame({'Class':[i for i in range(10)],
                 'Precision' : precision,
                 'Recall' : recall,
                 'F1_Score': f1_score}))
    
    
    print('\nAccuracy  : ', Accuracy.item())

In [7]:
#function to test model
def test(Model):
    Loss = []
    #confusion matrix
    cnf = torch.zeros(10,10)

    #evaluate on test data
    with torch.no_grad():
        #set model to evaluation mode
        Model.eval()
        #evaluate on test data
        for i, sample in enumerate(test_loader):
            output = Model(sample[0].to(device))
            loss = loss_function(output, sample[1].to(device))
            Loss.append(loss.item())
            #calculate output by argmax
            output = torch.argmax(output, 1)
            #update entries in confusion matrix
            for i in range(output.shape[0]):
                cnf[output[i],sample[1][i]] +=1

        #print test loss
        print('Test loss : ', np.mean(Loss))

    #print evaluation summary
    evaluate(cnf)

In [8]:
#define loss function
loss_function = nn.CrossEntropyLoss()

In [9]:
#Create Model
Model = GRU(input_size=28,
            hidden_size=64,
            num_layers=3,
            output_size=10).to(device)
#Define optimizer
optimizer = optim.Adam(Model.parameters())
#train model with validation
train(Model, validate=True, max_epoch=30)

epoch :  0 ; Train_acc :  0.7532 ; Val_acc :  0.8052 ; Train_loss   0.6806 ; Val_loss   0.5225
epoch :  1 ; Train_acc :  0.8402 ; Val_acc :  0.8471 ; Train_loss   0.4352 ; Val_loss   0.4122
epoch :  2 ; Train_acc :  0.8616 ; Val_acc :  0.8666 ; Train_loss   0.3731 ; Val_loss   0.3638
epoch :  3 ; Train_acc :  0.8741 ; Val_acc :  0.873 ; Train_loss   0.339 ; Val_loss   0.3549
epoch :  4 ; Train_acc :  0.8827 ; Val_acc :  0.878 ; Train_loss   0.315 ; Val_loss   0.3352
epoch :  5 ; Train_acc :  0.8898 ; Val_acc :  0.8876 ; Train_loss   0.2975 ; Val_loss   0.3158
epoch :  6 ; Train_acc :  0.8965 ; Val_acc :  0.8894 ; Train_loss   0.2779 ; Val_loss   0.308
epoch :  7 ; Train_acc :  0.9004 ; Val_acc :  0.8941 ; Train_loss   0.2635 ; Val_loss   0.3006
epoch :  8 ; Train_acc :  0.9064 ; Val_acc :  0.8928 ; Train_loss   0.2511 ; Val_loss   0.2973
epoch :  9 ; Train_acc :  0.9103 ; Val_acc :  0.8956 ; Train_loss   0.2395 ; Val_loss   0.2908
epoch :  10 ; Train_acc :  0.913 ; Val_acc :  0.8928 ; 

In [10]:
#Let's train our model for 21 epochs on full training set
#Create Model
Model = GRU(input_size=28,
        hidden_size=64,
        num_layers=3,
        output_size=10).to(device)

#Define optimizer
optimizer = optim.Adam(Model.parameters())
#train
train(Model, validate=False, max_epoch=21)

epoch =  0 ; Train_acc :  0.7628 ; Train_loss   0.6461
epoch =  1 ; Train_acc :  0.8486 ; Train_loss   0.4107
epoch =  2 ; Train_acc :  0.8663 ; Train_loss   0.3596
epoch =  3 ; Train_acc :  0.8797 ; Train_loss   0.326
epoch =  4 ; Train_acc :  0.8883 ; Train_loss   0.3024
epoch =  5 ; Train_acc :  0.8948 ; Train_loss   0.2853
epoch =  6 ; Train_acc :  0.9006 ; Train_loss   0.2691
epoch =  7 ; Train_acc :  0.9046 ; Train_loss   0.2553
epoch =  8 ; Train_acc :  0.9107 ; Train_loss   0.2411
epoch =  9 ; Train_acc :  0.9146 ; Train_loss   0.2316
epoch =  10 ; Train_acc :  0.9172 ; Train_loss   0.2221
epoch =  11 ; Train_acc :  0.9214 ; Train_loss   0.2124
epoch =  12 ; Train_acc :  0.9236 ; Train_loss   0.2033
epoch =  13 ; Train_acc :  0.9283 ; Train_loss   0.1914
epoch =  14 ; Train_acc :  0.9299 ; Train_loss   0.1872
epoch =  15 ; Train_acc :  0.9344 ; Train_loss   0.1771
epoch =  16 ; Train_acc :  0.9359 ; Train_loss   0.1713
epoch =  17 ; Train_acc :  0.9398 ; Train_loss   0.1638
epo

In [11]:
#Let's test model now
test(Model)

Test loss :  0.2945635337085008

    Class  Precision  Recall  F1_Score
0      0   0.855959   0.826  0.840712
1      1   0.995889   0.969  0.982260
2      2   0.845128   0.824  0.834430
3      3   0.869202   0.937  0.901829
4      4   0.831673   0.835  0.833333
5      5   0.977023   0.978  0.977511
6      6   0.738189   0.750  0.744048
7      7   0.964575   0.953  0.958753
8      8   0.986829   0.974  0.980372
9      9   0.958539   0.971  0.964729

Accuracy  :  0.9017000198364258


In [12]:
#Finally, let's save our model
torch.save(Model.state_dict(), './saved_models/GRU_FMNIST.pth')

In [None]:
#To Retrieve
Modelx = GRU(input_size=28,
        hidden_size=64,
        num_layers=3,
        output_size=10).to(device)

Modelx.load_state_dict(torch.load('./saved_models/GRU_FMNIST.pth'))