# Advanced MLPs

### Dropout

### Batch Normalization

### Data Augmentation

### Combined BN, Dropout and DA

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets

### Dropout
Dropout is a very popular technique that act as ensemble method for neural networks. For tis, we just need to add one dropout layer before every fully connected layer. Dropout percentage is something that need to be tuned

### Batch Normalization
Batch normalization is a technique for training very deep neural networks that standardizes the inputs to a layer for each mini-batch. This has the effect of stabilizing the learning process and dramatically reducing the number of training epochs required to train deep networks. To implement this, we need to add a batch norm layer efor every fully connected layer.

### Data Augmentation
Data augmentation is a strategy that enables practitioners to significantly increase the diversity of data available for training models, without actually collecting new data. Data augmentation techniques such as cropping, padding, and horizontal flipping are commonly used to train large neural networks. Let's now augment or train dataset by randomly cropping images. We write a function that performs a series of operations on data and call it before every iteration wit prob 0.5.

In [2]:
#Let's download data using torchvision
trainset = datasets.FashionMNIST('./../0. Data/', 
                                 download = True, 
                                 train = True, 
                                 transform = T.Compose([
                                     T.ToTensor()
                                 ]))

testset = datasets.FashionMNIST('./../0. Data/', 
                                 download = True, 
                                 train = False, 
                                 transform = T.Compose([
                                     T.ToTensor()
                                 ]))

#split training data to training and validation  data
train_set, val_set = torch.utils.data.random_split(trainset, [50000, 10000])

#Convert data to dataloader

train_loader = torch.utils.data.DataLoader(train_set, 
                                          batch_size = 32, 
                                          shuffle = True)

val_loader = torch.utils.data.DataLoader(val_set,
                                        batch_size = 32,
                                        shuffle = True)

test_loader = torch.utils.data.DataLoader(testset, 
                                         batch_size = 32, 
                                         shuffle = True)

full_train_set  = torch.utils.data.DataLoader(trainset, 
                                          batch_size = 32, 
                                          shuffle = True)


In [3]:
#Check for GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [4]:
#Create a MLP class defining our neural network
class MLP(nn.Module):
    def __init__(self, input_len, output_len, d_out, b_norm):
        super(MLP, self).__init__()
        #three fully connected layers
        self.fc1 = nn.Linear(in_features=input_len, out_features=512)
        self.bn1 = torch.nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(in_features=512, out_features=256)
        self.bn2 = torch.nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(in_features=256, out_features=10)
        self.b_norm = b_norm
        self.d_out = d_out
        
        
    def forward(self, x):
        #convert image to a one dimentional tensor before feeding to neural network
        x = x.flatten(start_dim=1)
        #activation function is relu
        x = F.relu(self.fc1(x))
        if(self.b_norm):
            x = self.bn1(x)
        #dropout
        if(self.d_out):
            x = F.dropout(x, p=0.4, training=self.training)
        x = F.relu(self.fc2(x))
        if(self.b_norm):
            x = self.bn2(x)
        #dropout
        if(self.d_out):
            x = F.dropout(x, p=0.4, training=self.training)
        x = self.fc3(x)
        
        return x
        

In [5]:
#define a function for data augmentation
dataAugment = T.Compose([T.ToPILImage(), T.Resize(32), T.RandomCrop(28), T.ToTensor()])

In [6]:
#define training function
def train(Model, aug, validate, max_epoch):
    for epoch in range(max_epoch):
        Train_Loss = []
        Val_Loss =[]
        loader = full_train_set
        
        if(validate):
            loader = train_loader
        
        cnf_tr = torch.zeros(10,10)
        cnf_val = torch.zeros(10,10)
        
        #Train on training data
        for i, sample in enumerate(loader):
            #data Augmentation
            d = []
            if(torch.rand(1)<0.5 and aug):
                for t in sample[0]:
                    d.append(dataAugment(t))
                d = torch.stack((d))
            else:
                d = sample[0]

            #set model to train mode
            Model.train()
            #set gradiuents to zero
            optimizer.zero_grad()
            #obtain output
            output = Model(d.to(device)).to(device)
            #compute loss
            loss = loss_function(output, sample[1].to(device))
            #compute gradients
            loss.backward()
            #optimize weights
            optimizer.step()
            #record train loss
            Train_Loss.append(loss.item())
            
            with torch.no_grad():
                #calculate output by argmax
                output = torch.argmax(output, 1)
                #update entries in confusion matrix
                for i in range(output.shape[0]):
                    cnf_tr[output[i],sample[1][i]] +=1
            
        if(validate):
            #Evaluate on validation data
            with torch.no_grad():
                #set model to evaluation mode
                Model.eval()
                #evaluate on tvaidation data
                for i, sample in enumerate(val_loader):
                    output = Model(sample[0].to(device))
                    loss = loss_function(output, sample[1].to(device))
                    Val_Loss.append(loss.item())
                    #calculate output by argmax
                    output = torch.argmax(output, 1)
                    #update entries in confusion matrix
                    for i in range(output.shape[0]):
                        cnf_val[output[i],sample[1][i]] +=1
                   
        actual_count = torch.sum(cnf_tr, dim=0)
        correct_pred = torch.tensor([cnf_tr[i,i] for i in range(10)])
        A_tr = (torch.sum(correct_pred)/torch.sum(actual_count)).item()
        
        if(validate):
            actual_count = torch.sum(cnf_val, dim=0)
            correct_pred = torch.tensor([cnf_val[i,i] for i in range(10)])
            A_val = (torch.sum(correct_pred)/torch.sum(actual_count)).item()
        
        #print losses in every epoch
        if(validate):
            print('epoch : ',epoch,'; Train_acc : ', np.round(A_tr,4), '; Val_acc : ', np.round(A_val,4),  
                  '; Train_loss  ',np.round(np.mean(Train_Loss),4),  '; Val_loss  ',np.round(np.mean(Val_Loss),4))
        else:
            print('epoch = ',epoch,'; Train_acc : ', np.round(A_tr,4), '; Train_loss  ',np.round(np.mean(Train_Loss),4))

In [7]:
#Function top evaluate model using performace metrices
def evaluate(cnf):
    actual_count = torch.sum(cnf, dim=0)
    predicted_count = torch.sum(cnf, dim=1)
    correct_pred = torch.tensor([cnf[i,i] for i in range(10)])
    #Precision
    precision = correct_pred/predicted_count
    #Recall
    recall = correct_pred/actual_count
    #F1-Score
    f1_score = 2*precision*recall/(precision+recall)
    #Accuracy
    Accuracy = torch.sum(correct_pred)/torch.sum(actual_count)
    print('\n',pd.DataFrame({'Class':[i for i in range(10)],
                 'Precision' : precision,
                 'Recall' : recall,
                 'F1_Score': f1_score}))
    
    
    print('\nAccuracy  : ', Accuracy.item())
    

In [8]:
#function to test model
def test(Model):
    Loss = []
    #confusion matrix
    cnf = torch.zeros(10,10)

    #evaluate on test data
    with torch.no_grad():
        #set model to evaluation mode
        Model.eval()
        #evaluate on test data
        for i, sample in enumerate(test_loader):
            output = Model(sample[0].to(device))
            loss = loss_function(output, sample[1].to(device))
            Loss.append(loss.item())
            #calculate output by argmax
            output = torch.argmax(output, 1)
            #update entries in confusion matrix
            for i in range(output.shape[0]):
                cnf[output[i],sample[1][i]] +=1

        #print test loss
        print('Test loss : ', np.mean(Loss))

    #print evaluation summary
    evaluate(cnf)

In [9]:
#define loss function
loss_function = nn.CrossEntropyLoss()

### Only dropout

In [10]:
#Create Model
Model = MLP(784,10,d_out=True, b_norm=False).to(device)
#Define optimizer
optimizer = optim.Adam(Model.parameters())
#train model with validation
train(Model, aug=False, validate=True, max_epoch=30)

epoch :  0 ; Train_acc :  0.7862 ; Val_acc :  0.8367 ; Train_loss   0.5842 ; Val_loss   0.4416
epoch :  1 ; Train_acc :  0.8373 ; Val_acc :  0.8541 ; Train_loss   0.4477 ; Val_loss   0.4008
epoch :  2 ; Train_acc :  0.8462 ; Val_acc :  0.8645 ; Train_loss   0.4182 ; Val_loss   0.3713
epoch :  3 ; Train_acc :  0.855 ; Val_acc :  0.8752 ; Train_loss   0.3954 ; Val_loss   0.3477
epoch :  4 ; Train_acc :  0.8613 ; Val_acc :  0.8792 ; Train_loss   0.3812 ; Val_loss   0.3369
epoch :  5 ; Train_acc :  0.8645 ; Val_acc :  0.8739 ; Train_loss   0.3704 ; Val_loss   0.3419
epoch :  6 ; Train_acc :  0.8663 ; Val_acc :  0.8731 ; Train_loss   0.3629 ; Val_loss   0.3428
epoch :  7 ; Train_acc :  0.8684 ; Val_acc :  0.8683 ; Train_loss   0.3555 ; Val_loss   0.3483
epoch :  8 ; Train_acc :  0.8732 ; Val_acc :  0.8822 ; Train_loss   0.3444 ; Val_loss   0.3271
epoch :  9 ; Train_acc :  0.8759 ; Val_acc :  0.8872 ; Train_loss   0.3387 ; Val_loss   0.3154
epoch :  10 ; Train_acc :  0.8761 ; Val_acc :  0.87

In [11]:
#Let's train our model for 20 epochs on full training set
#Create Model
Model = MLP(784,10,d_out=True, b_norm=False).to(device)
#Define optimizer
optimizer = optim.Adam(Model.parameters())
#train
train(Model, aug=False, validate=False, max_epoch=20)

epoch =  0 ; Train_acc :  0.7929 ; Train_loss   0.5684
epoch =  1 ; Train_acc :  0.84 ; Train_loss   0.4404
epoch =  2 ; Train_acc :  0.8516 ; Train_loss   0.4077
epoch =  3 ; Train_acc :  0.8554 ; Train_loss   0.3944
epoch =  4 ; Train_acc :  0.863 ; Train_loss   0.3761
epoch =  5 ; Train_acc :  0.8647 ; Train_loss   0.3694
epoch =  6 ; Train_acc :  0.8694 ; Train_loss   0.3595
epoch =  7 ; Train_acc :  0.8716 ; Train_loss   0.3516
epoch =  8 ; Train_acc :  0.8725 ; Train_loss   0.3446
epoch =  9 ; Train_acc :  0.8754 ; Train_loss   0.3404
epoch =  10 ; Train_acc :  0.8787 ; Train_loss   0.3345
epoch =  11 ; Train_acc :  0.881 ; Train_loss   0.3266
epoch =  12 ; Train_acc :  0.8817 ; Train_loss   0.3215
epoch =  13 ; Train_acc :  0.8813 ; Train_loss   0.3231
epoch =  14 ; Train_acc :  0.8837 ; Train_loss   0.3174
epoch =  15 ; Train_acc :  0.8846 ; Train_loss   0.312
epoch =  16 ; Train_acc :  0.8861 ; Train_loss   0.3081
epoch =  17 ; Train_acc :  0.887 ; Train_loss   0.3096
epoch = 

In [12]:
#Let's test model now
test(Model)

Test loss :  0.3248896585485806

    Class  Precision  Recall  F1_Score
0      0   0.840864   0.856  0.848365
1      1   0.992783   0.963  0.977665
2      2   0.790402   0.807  0.798615
3      3   0.841155   0.932  0.884250
4      4   0.806218   0.778  0.791858
5      5   0.985656   0.962  0.973684
6      6   0.731148   0.669  0.698695
7      7   0.934223   0.980  0.956564
8      8   0.977000   0.977  0.977000
9      9   0.971370   0.950  0.960566

Accuracy  :  0.8873999714851379


In [13]:
#Finally, let's save our model
torch.save(Model.state_dict(), './saved_models/dropout_MLP.pth')

### Only Batch Normalization

In [14]:
#Create Model
Model = MLP(784,10,d_out=False, b_norm=True).to(device)
#Define optimizer
optimizer = optim.Adam(Model.parameters())
#train model with validation
train(Model, aug=False, validate=True, max_epoch=30)

epoch :  0 ; Train_acc :  0.8245 ; Val_acc :  0.8518 ; Train_loss   0.4882 ; Val_loss   0.4087
epoch :  1 ; Train_acc :  0.8539 ; Val_acc :  0.857 ; Train_loss   0.3994 ; Val_loss   0.4011
epoch :  2 ; Train_acc :  0.8662 ; Val_acc :  0.8714 ; Train_loss   0.3645 ; Val_loss   0.3682
epoch :  3 ; Train_acc :  0.8762 ; Val_acc :  0.879 ; Train_loss   0.3375 ; Val_loss   0.3562
epoch :  4 ; Train_acc :  0.8847 ; Val_acc :  0.8582 ; Train_loss   0.3174 ; Val_loss   0.4786
epoch :  5 ; Train_acc :  0.8883 ; Val_acc :  0.8783 ; Train_loss   0.303 ; Val_loss   0.3546
epoch :  6 ; Train_acc :  0.8913 ; Val_acc :  0.8803 ; Train_loss   0.2912 ; Val_loss   0.3612
epoch :  7 ; Train_acc :  0.8947 ; Val_acc :  0.8883 ; Train_loss   0.282 ; Val_loss   0.345
epoch :  8 ; Train_acc :  0.8977 ; Val_acc :  0.8813 ; Train_loss   0.275 ; Val_loss   0.3997
epoch :  9 ; Train_acc :  0.9017 ; Val_acc :  0.8833 ; Train_loss   0.2644 ; Val_loss   0.405
epoch :  10 ; Train_acc :  0.904 ; Val_acc :  0.8861 ; Tr

In [15]:
#Let's train our model for 20 epochs on full training set
#Create Model
Model = MLP(784,10,d_out=False, b_norm=True).to(device)
#Define optimizer
optimizer = optim.Adam(Model.parameters())
#train
train(Model, aug=False, validate=False, max_epoch=20)

epoch =  0 ; Train_acc :  0.8264 ; Train_loss   0.4778
epoch =  1 ; Train_acc :  0.8572 ; Train_loss   0.3894
epoch =  2 ; Train_acc :  0.8702 ; Train_loss   0.356
epoch =  3 ; Train_acc :  0.8765 ; Train_loss   0.333
epoch =  4 ; Train_acc :  0.8806 ; Train_loss   0.3187
epoch =  5 ; Train_acc :  0.8866 ; Train_loss   0.3039
epoch =  6 ; Train_acc :  0.8911 ; Train_loss   0.2911
epoch =  7 ; Train_acc :  0.8944 ; Train_loss   0.2816
epoch =  8 ; Train_acc :  0.8964 ; Train_loss   0.2762
epoch =  9 ; Train_acc :  0.9013 ; Train_loss   0.2622
epoch =  10 ; Train_acc :  0.9044 ; Train_loss   0.2546
epoch =  11 ; Train_acc :  0.9076 ; Train_loss   0.2452
epoch =  12 ; Train_acc :  0.9102 ; Train_loss   0.242
epoch =  13 ; Train_acc :  0.9118 ; Train_loss   0.2333
epoch =  14 ; Train_acc :  0.9137 ; Train_loss   0.2296
epoch =  15 ; Train_acc :  0.9157 ; Train_loss   0.2229
epoch =  16 ; Train_acc :  0.9182 ; Train_loss   0.2175
epoch =  17 ; Train_acc :  0.9191 ; Train_loss   0.2162
epoch

In [16]:
#Let's test model now
test(Model)

Test loss :  0.5158356818290183

    Class  Precision  Recall  F1_Score
0      0   0.883333   0.689  0.774157
1      1   0.979021   0.980  0.979510
2      2   0.817021   0.768  0.791753
3      3   0.890547   0.895  0.892768
4      4   0.823775   0.790  0.806534
5      5   0.968464   0.952  0.960161
6      6   0.585926   0.791  0.673191
7      7   0.958244   0.895  0.925543
8      8   0.980352   0.948  0.963905
9      9   0.901018   0.974  0.936088

Accuracy  :  0.8682000041007996


In [17]:
#Finally, let's save our model
torch.save(Model.state_dict(), './saved_models/dropout_MLP.pth')

### Only Data Augmentation

In [18]:
#Create Model
Model = MLP(784,10,d_out=False, b_norm=False).to(device)
#Define optimizer
optimizer = optim.Adam(Model.parameters())
#train model with validation
train(Model, aug=True, validate=True, max_epoch=30)

epoch :  0 ; Train_acc :  0.7573 ; Val_acc :  0.8282 ; Train_loss   0.6428 ; Val_loss   0.4574
epoch :  1 ; Train_acc :  0.8132 ; Val_acc :  0.8429 ; Train_loss   0.4953 ; Val_loss   0.4292
epoch :  2 ; Train_acc :  0.8276 ; Val_acc :  0.8565 ; Train_loss   0.4549 ; Val_loss   0.3765
epoch :  3 ; Train_acc :  0.8387 ; Val_acc :  0.8624 ; Train_loss   0.4245 ; Val_loss   0.3729
epoch :  4 ; Train_acc :  0.8441 ; Val_acc :  0.8668 ; Train_loss   0.4105 ; Val_loss   0.3548
epoch :  5 ; Train_acc :  0.8512 ; Val_acc :  0.8677 ; Train_loss   0.3941 ; Val_loss   0.3526
epoch :  6 ; Train_acc :  0.8536 ; Val_acc :  0.8744 ; Train_loss   0.3838 ; Val_loss   0.3423
epoch :  7 ; Train_acc :  0.8582 ; Val_acc :  0.8795 ; Train_loss   0.3721 ; Val_loss   0.3222
epoch :  8 ; Train_acc :  0.8621 ; Val_acc :  0.8746 ; Train_loss   0.3647 ; Val_loss   0.3312
epoch :  9 ; Train_acc :  0.863 ; Val_acc :  0.8778 ; Train_loss   0.3598 ; Val_loss   0.327
epoch :  10 ; Train_acc :  0.8657 ; Val_acc :  0.88 

In [19]:
#Let's train our model for 20 epochs on full training set
#Create Model
Model = MLP(784,10,d_out=False, b_norm=False).to(device)
#Define optimizer
optimizer = optim.Adam(Model.parameters())
#train
train(Model, aug=True, validate=False, max_epoch=20)

epoch =  0 ; Train_acc :  0.7641 ; Train_loss   0.6254
epoch =  1 ; Train_acc :  0.8168 ; Train_loss   0.484
epoch =  2 ; Train_acc :  0.835 ; Train_loss   0.4375
epoch =  3 ; Train_acc :  0.8428 ; Train_loss   0.4133
epoch =  4 ; Train_acc :  0.8483 ; Train_loss   0.4001
epoch =  5 ; Train_acc :  0.8515 ; Train_loss   0.3897
epoch =  6 ; Train_acc :  0.8547 ; Train_loss   0.3794
epoch =  7 ; Train_acc :  0.8605 ; Train_loss   0.3655
epoch =  8 ; Train_acc :  0.8611 ; Train_loss   0.3637
epoch =  9 ; Train_acc :  0.8662 ; Train_loss   0.353
epoch =  10 ; Train_acc :  0.8686 ; Train_loss   0.3444
epoch =  11 ; Train_acc :  0.8711 ; Train_loss   0.3381
epoch =  12 ; Train_acc :  0.8722 ; Train_loss   0.3364
epoch =  13 ; Train_acc :  0.8762 ; Train_loss   0.3287
epoch =  14 ; Train_acc :  0.8754 ; Train_loss   0.3301
epoch =  15 ; Train_acc :  0.876 ; Train_loss   0.3231
epoch =  16 ; Train_acc :  0.877 ; Train_loss   0.3185
epoch =  17 ; Train_acc :  0.8811 ; Train_loss   0.3152
epoch =

In [20]:
#Let's test model now
test(Model)

Test loss :  0.32735916778159596

    Class  Precision  Recall  F1_Score
0      0   0.810606   0.856  0.832685
1      1   0.998962   0.962  0.980132
2      2   0.855294   0.727  0.785946
3      3   0.890909   0.882  0.886432
4      4   0.761991   0.842  0.800000
5      5   0.980553   0.958  0.969145
6      6   0.692161   0.724  0.707722
7      7   0.939806   0.968  0.953695
8      8   0.987817   0.973  0.980353
9      9   0.963928   0.962  0.962963

Accuracy  :  0.8853999972343445


In [21]:
#Finally, let's save our model
torch.save(Model.state_dict(), './saved_models/data_aug_MLP.pth')

### Combining Dropout, BN and Data Augmentation

All 3 methods usually do nor perform well together as batch normalization usually eliminated the need for dropout. However, we can tune hyperparameterss to obtain good results. We can obtain better results by decreasing dropout rate.

In [23]:
#Create Model
Model = MLP(784,10,d_out=True, b_norm=True).to(device)
#Define optimizer
optimizer = optim.Adam(Model.parameters())
#train model with validation
train(Model, aug=True, validate=True, max_epoch=50)

epoch :  0 ; Train_acc :  0.7386 ; Val_acc :  0.8102 ; Train_loss   0.7094 ; Val_loss   0.5185
epoch :  1 ; Train_acc :  0.7706 ; Val_acc :  0.8279 ; Train_loss   0.6231 ; Val_loss   0.4622
epoch :  2 ; Train_acc :  0.7808 ; Val_acc :  0.8253 ; Train_loss   0.5923 ; Val_loss   0.4695
epoch :  3 ; Train_acc :  0.7851 ; Val_acc :  0.8374 ; Train_loss   0.5845 ; Val_loss   0.4527
epoch :  4 ; Train_acc :  0.7928 ; Val_acc :  0.8424 ; Train_loss   0.5645 ; Val_loss   0.435
epoch :  5 ; Train_acc :  0.7944 ; Val_acc :  0.8495 ; Train_loss   0.5546 ; Val_loss   0.4251
epoch :  6 ; Train_acc :  0.7996 ; Val_acc :  0.8545 ; Train_loss   0.5431 ; Val_loss   0.3913
epoch :  7 ; Train_acc :  0.8036 ; Val_acc :  0.851 ; Train_loss   0.5333 ; Val_loss   0.4454
epoch :  8 ; Train_acc :  0.8023 ; Val_acc :  0.8396 ; Train_loss   0.5328 ; Val_loss   0.425
epoch :  9 ; Train_acc :  0.8077 ; Val_acc :  0.8413 ; Train_loss   0.5262 ; Val_loss   0.4221
epoch :  10 ; Train_acc :  0.8075 ; Val_acc :  0.8553

In [25]:
#let's create new model with lower dropout rate, to improve performance
#Create a MLP class defining our neural network
class MLP(nn.Module):
    def __init__(self, input_len, output_len, d_out, b_norm):
        super(MLP, self).__init__()
        #three fully connected layers
        self.fc1 = nn.Linear(in_features=input_len, out_features=512)
        self.bn1 = torch.nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(in_features=512, out_features=256)
        self.bn2 = torch.nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(in_features=256, out_features=10)
        self.b_norm = b_norm
        self.d_out = d_out
        
        
    def forward(self, x):
        #convert image to a one dimentional tensor before feeding to neural network
        x = x.flatten(start_dim=1)
        #activation function is relu
        x = F.relu(self.fc1(x))
        if(self.b_norm):
            x = self.bn1(x)
        #dropout
        if(self.d_out):
            x = F.dropout(x, p=0.2, training=self.training)
        x = F.relu(self.fc2(x))
        if(self.b_norm):
            x = self.bn2(x)
        #dropout
        if(self.d_out):
            x = F.dropout(x, p=0.2, training=self.training)
        x = self.fc3(x)
        
        return x
        

In [26]:
#Let's train our model for 30 epochs on full training set and by decresing dropout rate to 0.2
#Create Model
Model = MLP(784,10,d_out=True, b_norm=True).to(device)
#Define optimizer
optimizer = optim.Adam(Model.parameters())
#train
train(Model, aug=True, validate=False, max_epoch=30)

epoch =  0 ; Train_acc :  0.7584 ; Train_loss   0.6517
epoch =  1 ; Train_acc :  0.7901 ; Train_loss   0.5651
epoch =  2 ; Train_acc :  0.802 ; Train_loss   0.5346
epoch =  3 ; Train_acc :  0.8106 ; Train_loss   0.5101
epoch =  4 ; Train_acc :  0.8147 ; Train_loss   0.4965
epoch =  5 ; Train_acc :  0.8184 ; Train_loss   0.4826
epoch =  6 ; Train_acc :  0.8248 ; Train_loss   0.4748
epoch =  7 ; Train_acc :  0.8259 ; Train_loss   0.4699
epoch =  8 ; Train_acc :  0.8288 ; Train_loss   0.4569
epoch =  9 ; Train_acc :  0.8323 ; Train_loss   0.4508
epoch =  10 ; Train_acc :  0.8324 ; Train_loss   0.4511
epoch =  11 ; Train_acc :  0.8322 ; Train_loss   0.4479
epoch =  12 ; Train_acc :  0.8348 ; Train_loss   0.4444
epoch =  13 ; Train_acc :  0.8376 ; Train_loss   0.438
epoch =  14 ; Train_acc :  0.8371 ; Train_loss   0.4342
epoch =  15 ; Train_acc :  0.8409 ; Train_loss   0.4273
epoch =  16 ; Train_acc :  0.8382 ; Train_loss   0.4314
epoch =  17 ; Train_acc :  0.8401 ; Train_loss   0.4239
epoc

In [27]:
#Let's test model now
test(Model)

Test loss :  0.3432034440219593

    Class  Precision  Recall  F1_Score
0      0   0.869823   0.735  0.796748
1      1   0.979960   0.978  0.978979
2      2   0.853051   0.685  0.759845
3      3   0.847505   0.917  0.880884
4      4   0.726974   0.884  0.797834
5      5   0.975535   0.957  0.966179
6      6   0.660360   0.698  0.678658
7      7   0.915414   0.974  0.943798
8      8   0.976861   0.971  0.973922
9      9   0.975000   0.936  0.955102

Accuracy  :  0.8734999895095825


In [28]:
#Finally, let's save our model
torch.save(Model.state_dict(), './saved_models/Combined_MLP.pth')

We can try out different combinations of three techniques to get better models