In [1]:
import pandas as dp
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets,transforms
from sklearn.metrics import confusion_matrix

In [2]:
path= "PYTORCH_NOTEBOOKS/Data"

In [3]:
transform = transforms.ToTensor()

In [4]:
x_train = datasets.MNIST(root = path , train = True,transform = transform)
x_test = datasets.MNIST(root = path , train = False , transform = transform)

In [5]:
train_loader = DataLoader(x_train,batch_size = 10,shuffle = True)

In [6]:
test_loader = DataLoader(x_test,batch_size = 10,shuffle = True)

In [7]:
conv1 = nn.Conv2d(1,6,3,1)
conv2 = nn.Conv2d(6,16,3,1)

In [8]:
for i , (x_t,y_t )in enumerate(x_train):
    break

In [9]:
x_t.shape

torch.Size([1, 28, 28])

In [10]:
x = x_t.view(1,1,28,28)

In [11]:
x.shape

torch.Size([1, 1, 28, 28])

In [12]:
x = F.relu(conv1(x))

In [13]:
x = F.max_pool2d(x,2,2)

In [14]:
x = F.relu(conv2(x))

In [15]:
x = F.max_pool2d(x,2,2)

In [16]:
x.shape

torch.Size([1, 16, 5, 5])

In [17]:
(((28-2)/2)-2)/2

5.5

In [18]:
x = x.view(-1,16*5*5)

In [19]:
x.shape

torch.Size([1, 400])

In [20]:

class ConvolutionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,6,3,1)
        self.conv2 = nn.Conv2d(6,16,3,1)
        self.fc1 = nn.Linear(5*5*16,720)
        self.fc2 = nn.Linear(720,80)
        self.fc3 = nn.Linear(80,10)
        
    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x,2,2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x,2,2)
        x = x.view(-1,5*5*16)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x,dim = 1)
    
        

In [21]:
torch.manual_seed(42)
model = ConvolutionModel()
model

ConvolutionModel(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=720, bias=True)
  (fc2): Linear(in_features=720, out_features=80, bias=True)
  (fc3): Linear(in_features=80, out_features=10, bias=True)
)

In [22]:
for param in model.parameters():
    print(param.numel())

54
6
864
16
288000
720
57600
80
800
10


In [23]:
optimizer = torch.optim.Adam(model.parameters(),lr = 0.001)
criterion  = nn.CrossEntropyLoss()


In [24]:
import time
start_time = time.time()

epochs = 5
train_losses = []
test_losses = []
train_correct = []
test_correct = []

for i in range(epochs):
    trn_corr = 0
    tst_corr = 0
    
    # Run the training batches
    for b, (X_train, y_train) in enumerate(train_loader):
        b+=1
        
        # Apply the model
        y_pred = model(X_train)  # we don't flatten X-train here
        loss = criterion(y_pred, y_train)
 
        # Tally the number of correct predictions
        predicted = torch.max(y_pred.data, 1)[1]
        batch_corr = (predicted == y_train).sum()
        trn_corr += batch_corr
        
        # Update parameters
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Print interim results
        if b%600 == 0:
            print(f'epoch: {i:2}  batch: {b:4} [{10*b:6}/60000]  loss: {loss.item():10.8f}  \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')
        
    train_losses.append(loss)
    train_correct.append(trn_corr)
        
    # Run the testing batches
    with torch.no_grad():
        for b, (X_test, y_test) in enumerate(test_loader):

            # Apply the model
            y_val = model(X_test)

            # Tally the number of correct predictions
            predicted = torch.max(y_val.data, 1)[1] 
            tst_corr += (predicted == y_test).sum()
            
    loss = criterion(y_val, y_test)
    test_losses.append(loss)
    test_correct.append(tst_corr)
        
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed            

epoch:  0  batch:  600 [  6000/60000]  loss: 0.19702342  accuracy:  79.867%
epoch:  0  batch: 1200 [ 12000/60000]  loss: 0.01941127  accuracy:  86.783%
epoch:  0  batch: 1800 [ 18000/60000]  loss: 0.07161964  accuracy:  89.722%
epoch:  0  batch: 2400 [ 24000/60000]  loss: 0.05725450  accuracy:  91.325%
epoch:  0  batch: 3000 [ 30000/60000]  loss: 0.03065458  accuracy:  92.327%
epoch:  0  batch: 3600 [ 36000/60000]  loss: 0.00127158  accuracy:  93.111%
epoch:  0  batch: 4200 [ 42000/60000]  loss: 0.00394788  accuracy:  93.655%
epoch:  0  batch: 4800 [ 48000/60000]  loss: 0.34566808  accuracy:  94.108%
epoch:  0  batch: 5400 [ 54000/60000]  loss: 0.00153218  accuracy:  94.481%
epoch:  0  batch: 6000 [ 60000/60000]  loss: 0.00268220  accuracy:  94.818%
epoch:  1  batch:  600 [  6000/60000]  loss: 0.37970442  accuracy:  97.917%
epoch:  1  batch: 1200 [ 12000/60000]  loss: 0.00094762  accuracy:  97.825%
epoch:  1  batch: 1800 [ 18000/60000]  loss: 0.00230618  accuracy:  97.906%
epoch:  1  b

In [34]:
y_pred.data

tensor([[ 0.0000e+00, -4.3611e+01, -2.9384e+01, -4.0632e+01, -3.9588e+01,
         -3.1817e+01, -2.9050e+01, -3.0303e+01, -2.8325e+01, -2.3304e+01],
        [-3.1532e+01,  0.0000e+00, -2.6375e+01, -4.2028e+01, -2.6840e+01,
         -3.3386e+01, -3.2263e+01, -2.0107e+01, -2.3225e+01, -2.3301e+01],
        [-1.6709e+01, -1.8759e+01, -1.5648e+01, -1.3461e+01, -9.2048e+00,
         -1.3398e+01, -1.7817e+01, -9.9848e+00, -8.9437e+00, -2.8046e-04],
        [-3.2101e+01, -2.6881e+01, -1.9084e+01, -1.8302e+01, -2.4184e+01,
         -2.4941e+01, -4.1236e+01,  0.0000e+00, -2.5854e+01, -2.3779e+01],
        [-1.2974e+01, -1.4388e+01, -9.7388e+00, -1.0088e+01, -1.1920e+01,
         -1.1319e+01, -1.2898e+01, -1.2368e+01, -1.6390e-04, -1.0266e+01],
        [-1.8523e+01, -2.0401e+01, -1.6456e+01, -1.4865e+01, -1.7390e+01,
         -1.4277e+01, -1.7097e+01, -1.9208e+01, -1.1921e-06, -1.6231e+01],
        [-1.7666e+01, -2.1497e+01, -2.3693e+01, -2.7442e+01, -1.7967e+01,
         -1.8525e+01,  0.0000e+0