In [1]:
import torch
import torch.nn as nn

from torch.utils.data import dataloader
from torchvision import datasets, transforms
from torchvision.utils import make_grid


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

In [2]:
transform = transforms.ToTensor()

In [3]:
train_data = datasets.MNIST(root='../DATA', train=True, download=True, transform=transform)

In [4]:
test_data = datasets.MNIST(root='../DATA', train=False, download=True, transform=transform)

In [5]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: ../DATA
    Split: Train
    StandardTransform
Transform: ToTensor()

In [6]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: ../DATA
    Split: Test
    StandardTransform
Transform: ToTensor()

In [7]:
from torch.utils.data import DataLoader

In [8]:
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

**2 CONVOLUTIONAL LAYERS**

In [9]:
conv1 = nn.Conv2d(1,6,3,1)

conv2 = nn.Conv2d(6,16,3,1)

In [10]:
# To get the first image from the dataset

for i, (X_train, y_train) in enumerate(train_data):
    break

In [11]:
X_train

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,

In [12]:
# Currently this is in 3D as it has no batch

X_train.shape

torch.Size([1, 28, 28])

In [13]:
x = X_train.view(1,1,28,28)

In [14]:
import torch.nn.functional as F

In [15]:
x = F.relu(conv1(x))

In [16]:
x.shape

torch.Size([1, 6, 26, 26])

In [17]:
x = F.max_pool2d(x,2,2)

In [18]:
x.shape

torch.Size([1, 6, 13, 13])

In [19]:
x = F.relu(conv2(x))

In [20]:
x.shape

torch.Size([1, 16, 11, 11])

In [21]:
x = F.max_pool2d(x,2,2)

In [22]:
x.shape

torch.Size([1, 16, 5, 5])

In [23]:
x.view(-1,16*5*5).shape

torch.Size([1, 400])

In [24]:
class ConvolutionalNetwork(nn.Module):
    
    def __init__(self):
        
        super().__init__()
        
        self.conv1 = nn.Conv2d(1,6,3,1)
        self.conv2 = nn.Conv2d(6,16,3,1)
        self.fc1 = nn.Linear(5*5*16,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
        
        
    def forward(self,X):
        
        X = F.relu(self.conv1(X))
        X = F.max_pool2d(X,2,2)
        X = F.relu(self.conv2(X))
        X = F.max_pool2d(X,2,2)
        
        X = X.view(-1,16*5*5)
        
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = self.fc3(X)
        
        return F.log_softmax(X,dim=1)

In [25]:
torch.manual_seed(42)

model = ConvolutionalNetwork()

model

ConvolutionalNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [26]:
for param in model.parameters():
    print(param.numel())

54
6
864
16
48000
120
10080
84
840
10


In [27]:
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [28]:
import time

start_time = time.time()


# TRACKERS

epochs = 5

train_losses = []
test_losses = []

train_correct = []
test_correct = []


# EPOCHS

for i in range(epochs):
    trn_corr = 0
    tst_corr = 0
    

# TRAIN

    for b,(X_train, y_train) in enumerate(train_loader):
        b+=1

        y_pred = model(X_train)
        loss = criterion(y_pred, y_train)



        predicted = torch.max(y_pred.data, 1)[1]
        batch_corr = (predicted == y_train).sum()
        trn_corr += batch_corr


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if b%600 == 0:
            print(f'epoch: {i:2}  batch: {b:4} [{10*b:6}/60000]  loss: {loss.item():10.8f}  \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')


    train_losses.append(loss)      
    train_correct.append(trn_corr)




# TEST


with torch.no_grad():
    
    for b, (X_test,y_test) in enumerate(test_loader):
        
        y_val = model(X_test)
        
        prediction = torch.max(y_val.data,1)[1] 
        tst_corr += (predicted == y_test).sum()
        

loss = criterion(y_val,y_test)        
test_correct.append(tst_corr)       
test_losses.append(loss)


current_time = time.time()

total = current_time - start_time

print(f"training took {total/60} minutes")

epoch:  0  batch:  600 [  6000/60000]  loss: 0.04055630  accuracy:  78.417%
epoch:  0  batch: 1200 [ 12000/60000]  loss: 0.08253471  accuracy:  85.800%
epoch:  0  batch: 1800 [ 18000/60000]  loss: 0.36470532  accuracy:  88.689%
epoch:  0  batch: 2400 [ 24000/60000]  loss: 0.01825019  accuracy:  90.525%
epoch:  0  batch: 3000 [ 30000/60000]  loss: 0.00806712  accuracy:  91.650%
epoch:  0  batch: 3600 [ 36000/60000]  loss: 0.00097706  accuracy:  92.492%
epoch:  0  batch: 4200 [ 42000/60000]  loss: 0.44326892  accuracy:  93.133%
epoch:  0  batch: 4800 [ 48000/60000]  loss: 0.03169333  accuracy:  93.615%
epoch:  0  batch: 5400 [ 54000/60000]  loss: 0.01946524  accuracy:  94.031%
epoch:  0  batch: 6000 [ 60000/60000]  loss: 0.02709320  accuracy:  94.333%
epoch:  1  batch:  600 [  6000/60000]  loss: 0.01658557  accuracy:  97.767%
epoch:  1  batch: 1200 [ 12000/60000]  loss: 0.02987212  accuracy:  97.758%
epoch:  1  batch: 1800 [ 18000/60000]  loss: 0.00245765  accuracy:  97.650%
epoch:  1  b