In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x22856c468b0>

In [5]:
# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='./data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=False)

test_dataset = torchvision.datasets.MNIST(root='./data', 
                                          train=False, 
                                          transform=transforms.ToTensor())

print("train_dataset size:", len(train_dataset),"\ntest_dataset size:", len(test_dataset))

train_dataset size: 60000 
test_dataset size: 10000


In [7]:
# Data loader 1
m1train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=64, 
                                           shuffle=True)

m1test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=100, 
                                          shuffle=False)

In [27]:
# Data loader 2
m2train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=1000, 
                                           shuffle=True)

m2test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=100, 
                                          shuffle=False)

In [32]:
class M1(nn.Module):
    def __init__(self,):
        super(M1, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size) #1st Convolution
        self.pool = nn.MaxPool2d(2, 2)   #pool_size=2, strides=2 
        self.conv2 = nn.Conv2d(10, 20, kernel_size) #2nd Convolution
        self.fc1 = nn.Linear(320, 50) #((I/P - Filter + 2*Pad)/Stride)+1 
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(50, 100)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x):
        # -> n, 3, 32, 32
        x = self.pool(F.relu(self.conv1(x)))  
        x = self.pool(F.relu(self.conv2(x)))  
        x = x.view(-1, 320)            #Flattening 
        x = F.relu(self.fc1(x))        #Fully Connected NN   
        x = self.dropout(x)   
        x = F.relu(self.fc2(x))        #Fully Connected NN           
        x = self.fc3(x)                #O/P Layer       
        return x

In [33]:
#train function
def trainFunc(model,num_epochs,train_loader):
    print('strated')
    n_total_steps = len(train_loader)
    train_losses = []
    train_epoch = []
    train_acc = []
    not_converged =True
    epoch = 0
    while not_converged:
        epoch += 1
        n_correct = 0
        n_samples = 0
        for i, (images, labels) in enumerate(train_loader):  
            # Forward pass
            prediction = model(images)
            loss = loss_func(prediction, labels)
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(prediction.data, 1)
            n_samples += labels.size(0)
            n_correct += (predicted == labels).sum().item()
            acc = 100.0 * n_correct / n_samples

            if (i+1) % 500 == 0:
                print (f'Epoch [{epoch}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
                train_epoch.append(epoch)
                train_losses.append(loss.item())
                print(f'Epoch [{epoch}/{num_epochs}], Accuracy : {acc} %')
                train_acc.append(acc)

                if epoch == num_epochs:
                        print("Max Epoch Reached")
                        not_converged = False
                elif (epoch > 5) and  (train_losses[-1] < 0.001):
                    if abs(train_losses[-3] - train_losses[-2]) < 1.0e-05 and abs(train_losses[-2] - train_losses[-1]) < 1.0e-05:
                        print("Convergeance reached for loss:",loss_arr[-1])
                        not_converged = False
                        
    return train_epoch,train_losses,train_acc

In [34]:
# Hyper-parameters 
input_size = 784 # 28x28
max_epochs = 15
learning_rate = 0.001
kernel_size = 4
weight_decay_val = 1e-4
dropout = 0.25

In [35]:
mBatch1 = M1()
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mBatch1.parameters(), lr=learning_rate, weight_decay = weight_decay_val)

a=[]
for i in mBatch1.parameters():
    a.append(torch.numel(i))
print(f'Total no of parameters in Model 1 with batch_size={64} is:{np.sum(a)}')

Total no of parameters in Model 1 with batch_size=64 is:25550


In [36]:
B1_train_epoch,B1_train_losses,B1_train_acc = trainFunc(mBatch1,max_epochs,m1train_loader)

strated
Epoch [1/15], Step [500/938], Loss: 0.2480
Epoch [1/15], Accuracy : 82.478125 %
Epoch [2/15], Step [500/938], Loss: 0.1733
Epoch [2/15], Accuracy : 95.85625 %
Epoch [3/15], Step [500/938], Loss: 0.1368
Epoch [3/15], Accuracy : 97.04375 %
Epoch [4/15], Step [500/938], Loss: 0.0355
Epoch [4/15], Accuracy : 97.70625 %
Epoch [5/15], Step [500/938], Loss: 0.0324
Epoch [5/15], Accuracy : 97.978125 %
Epoch [6/15], Step [500/938], Loss: 0.0452
Epoch [6/15], Accuracy : 98.203125 %
Epoch [7/15], Step [500/938], Loss: 0.0082
Epoch [7/15], Accuracy : 98.165625 %
Epoch [8/15], Step [500/938], Loss: 0.0021
Epoch [8/15], Accuracy : 98.428125 %
Epoch [9/15], Step [500/938], Loss: 0.0754
Epoch [9/15], Accuracy : 98.50625 %
Epoch [10/15], Step [500/938], Loss: 0.0593
Epoch [10/15], Accuracy : 98.63125 %
Epoch [11/15], Step [500/938], Loss: 0.0146
Epoch [11/15], Accuracy : 98.75 %
Epoch [12/15], Step [500/938], Loss: 0.0341
Epoch [12/15], Accuracy : 98.69375 %
Epoch [13/15], Step [500/938], Loss:

In [37]:
batch1_param = torch.nn.utils.parameters_to_vector(mBatch1.parameters())
print(batch1_param,'\nlen:',len(batch1_param))

tensor([ 0.4764,  0.4666,  0.3013,  ...,  0.0815, -0.0638,  0.0091],
       grad_fn=<CatBackward0>) 
len: 25550


In [40]:
class M2(nn.Module):
    def __init__(self,):
        super(M2, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size) #1st Convolution
        self.pool = nn.MaxPool2d(2, 2)   #pool_size=2, strides=2 
        self.conv2 = nn.Conv2d(10, 20, kernel_size) #2nd Convolution
        self.fc1 = nn.Linear(320, 50) #((I/P - Filter + 2*Pad)/Stride)+1 
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(50, 100)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x):
        # -> n, 3, 32, 32
        x = self.pool(F.relu(self.conv1(x)))  
        x = self.pool(F.relu(self.conv2(x)))  
        x = x.view(-1, 320)            #Flattening 
        x = F.relu(self.fc1(x))        #Fully Connected NN   
        x = self.dropout(x)   
        x = F.relu(self.fc2(x))        #Fully Connected NN           
        x = self.fc3(x)                #O/P Layer       
        return x

In [42]:
mBatch2 = M2()
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mBatch2.parameters(), lr=learning_rate, weight_decay = weight_decay_val)

a=[]
for i in mBatch2.parameters():
    a.append(torch.numel(i))
print(f'Total no of parameters in Model 2 with batch_size={1024} is:{np.sum(a)}')

Total no of parameters in Model 2 with batch_size=1024 is:25550


In [43]:
B2_train_epoch,B2_train_losses,B2_train_acc = trainFunc(mBatch2,max_epochs,m2train_loader)

strated


KeyboardInterrupt: 

In [None]:
batch2_param = torch.nn.utils.parameters_to_vector(mBatch2.parameters())
print(batch2_param,'\nlen:',len(batch2_param))