In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets , transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
transform = transforms.ToTensor()

In [6]:
train_data = datasets.MNIST( root = "../Data" , train = True , download = True , transform = transform
                            )


In [7]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: ../Data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [9]:
test_data = datasets.MNIST(root='../Data', train=False, download=True, transform=transform)


In [10]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: ../Data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [11]:
train_loader = DataLoader(train_data , batch_size=10 , shuffle = True)

In [12]:
test_loader = DataLoader(test_data , batch_size=10, shuffle = False)

In [16]:
# COLOR CHANNEL (6 chaneels)
conv1 = nn.Conv2d(1,6,3,1)

In [17]:
conv1

Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))

In [18]:
conv2 = nn.Conv2d(6 , 16 , 3, 1 )

In [19]:
conv2

Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))

In [20]:
for i , (X_train , y_train) in enumerate(train_data):
    break

In [24]:
X_train.shape  # 4D batch (Batch of 1 image)

torch.Size([1, 28, 28])

In [27]:
x = X_train.view(1,1,28,28)  # 4D image batch

In [28]:
x = F.relu(conv1(x))

In [29]:
x.shape

torch.Size([1, 6, 26, 26])

In [31]:
x = F.max_pool2d(x , 2, 2)

In [32]:
x.shape

torch.Size([1, 6, 13, 13])

In [33]:
x = F.relu(conv2(x))

In [34]:
x.shape

torch.Size([1, 16, 11, 11])

In [35]:
x = F.max_pool2d(x , 2, 2)

In [36]:
x.shape

torch.Size([1, 16, 5, 5])

In [37]:
11/2

5.5

In [38]:
(((28-2)/2)-2)/2

5.5

In [39]:
x.shape

torch.Size([1, 16, 5, 5])

In [40]:
x.view(-1 , 16*5*5).shape

torch.Size([1, 400])

In [41]:
class ConvolutonalNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 3, 1)
        self.conv2 = nn.Conv2d(6, 16, 3, 1)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self,X):
        X = F.relu(self.conv1(X))
        X = F.max_pool2d(X, 2, 2)
        X = F.relu(self.conv2(X))
        X = F.max_pool2d(X, 2, 2)
        X = X.view(-1, 16 * 5 * 5)
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = self.fc3(X)
        return F.log_softmax(X, dim=1)

In [42]:
torch.manual_seed(42)
model = ConvolutonalNetwork()
model


ConvolutonalNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [43]:
for param in model.parameters():
    print(param.numel())
    

54
6
864
16
48000
120
10080
84
840
10


In [45]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters() , lr = 0.001)


In [None]:
import time 
start_time = time.time()
# Variable(Trackers)

epochs = 5
train_loss = []
test_loss = []
train_correct = []
test_correct = []
# For Loop EPOCHS
for i in range(epochs):
    trn_corr = 0
    tst_corr= 0
# TRAIN
for b , (X_train , y_train) in enumerate(train_loader):
    b+=1
    
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)

    predicted = torch.max(y_pred , y_train)
    batch_corr = (predicted == y_pred.data , 1)[1]
    trn_corr += batch_corr 
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if b % 600 == 0:
        print(f'Epoch [{i} ,  Batch [{b}], Loss: {loss.item():.4f} ')

    train_loss.append(loss.item())
    train_correct.append(trn_corr)

    # TEST
    with torch.no_grad():
        for b, (X_test, y_test) in enumerate(test_loader):
            y_val = model(X_test)
            predicted = torch.max(y_val.data , 1)[1]
            tst_corr += (predicted == y_test).sum()



    loss = criterion(y_val, y_test)
    test_loss.append(loss)
    test_correct.append(tst_corr)


current_time = time.time()
total = current_time - start_time
print(f'Training took {total/60} minutes')


    





In [None]:
plt.plot(train_loss , label = "Traingin Loss")
plt.plot(test_loss, label = "Testing/Validation Loss")
plt.title("Training and Validation Loss at EPOCHS")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
