In [2]:
# STEP 1: SETUP & IMPORTS

!pip3 install torchsummaryX

import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F




In [3]:
# STEP 2: LOAD DATASET
# MNIST dataset

train_dataset = datasets.MNIST(root='./data',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='./data',
                              train=False,
                              transform=transforms.ToTensor())

# Data loader
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=64,
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=64,
                         shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 99734472.74it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 108837101.37it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 34394757.14it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 3764182.72it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [4]:
# STEP 3: ACTIVATE DEVICE

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.get_device_name(0)


cuda:0


'Tesla T4'

In [5]:
# STEP 4: DEFINE NEURAL NETWORK MODEL (option A - Fully connected Neural Network)
# Fully connected neural network with one hidden layer

class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(784, 500)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(500, 10)

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        # no activation and no softmax at the end
        return out

model = NeuralNet()
model.to(device)

print (model)

NeuralNet(
  (l1): Linear(in_features=784, out_features=500, bias=True)
  (relu): ReLU()
  (l2): Linear(in_features=500, out_features=10, bias=True)
)


In [6]:
# STEP 4: DEFINE NEURAL NETWORK MODEL (option B - Convolutional Neural Network)
# LeNET

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = LeNet()
model.to(device)

print (model)

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [7]:
# Optional: Show the number of parameters in the network

def get_n_params(model):
    pp=0
    for p in list(model.parameters()):
        nn=1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp

print ("# net parameters: ", get_n_params(model))

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print ("# net parameters: ", count_parameters(model))


from torchsummaryX import summary

if isinstance(model, NeuralNet):
    summary(model, torch.zeros((1, 28*28)).to(device))

if isinstance(model, LeNet):
    summary(model, torch.zeros((1, 1, 28, 28)).to(device))


# net parameters:  44426
# net parameters:  44426
          Kernel Shape    Output Shape   Params Mult-Adds
Layer                                                    
0_conv1   [1, 6, 5, 5]  [1, 6, 24, 24]    156.0     86.4k
1_pool               -  [1, 6, 12, 12]        -         -
2_conv2  [6, 16, 5, 5]   [1, 16, 8, 8]   2.416k    153.6k
3_pool               -   [1, 16, 4, 4]        -         -
4_fc1       [256, 120]        [1, 120]   30.84k    30.72k
5_fc2        [120, 84]         [1, 84]  10.164k    10.08k
6_fc3         [84, 10]         [1, 10]    850.0     840.0
----------------------------------------------------------
                       Totals
Total params          44.426k
Trainable params      44.426k
Non-trainable params      0.0
Mult-Adds             281.64k


  df_sum = df.sum()


In [8]:
# STEP 5: DEFINE LOSS FUNCTION AND OPTIMIZATION METHOD

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [9]:
# STEP 6: TRAIN THE MODEL

n_total_steps = len(train_loader)
num_epochs = 3

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # images from dataset: [64, 1, 28, 28]
        # labels from dataset: [64]
        #print (images.size())
        #print (labels.size())

        if isinstance(model, NeuralNet):
            images = images.reshape(-1, 28*28)

        # resized: [64, 784]
        #print (images.size())

        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        # outputs: [64, 10]
        #print (outputs.size())
        loss = criterion(outputs, labels)

        # Show kernels
        """
        if isinstance(model, LeNet):
            print (model.conv1.weight.size())
            print (model.conv1.weight)
        """
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')


Epoch [1/3], Step [100/938], Loss: 0.7190
Epoch [1/3], Step [200/938], Loss: 0.2745
Epoch [1/3], Step [300/938], Loss: 0.3774
Epoch [1/3], Step [400/938], Loss: 0.1203
Epoch [1/3], Step [500/938], Loss: 0.1014
Epoch [1/3], Step [600/938], Loss: 0.2595
Epoch [1/3], Step [700/938], Loss: 0.1863
Epoch [1/3], Step [800/938], Loss: 0.0642
Epoch [1/3], Step [900/938], Loss: 0.0909
Epoch [2/3], Step [100/938], Loss: 0.1175
Epoch [2/3], Step [200/938], Loss: 0.0492
Epoch [2/3], Step [300/938], Loss: 0.0529
Epoch [2/3], Step [400/938], Loss: 0.1376
Epoch [2/3], Step [500/938], Loss: 0.1790
Epoch [2/3], Step [600/938], Loss: 0.0549
Epoch [2/3], Step [700/938], Loss: 0.0995
Epoch [2/3], Step [800/938], Loss: 0.0936
Epoch [2/3], Step [900/938], Loss: 0.0847
Epoch [3/3], Step [100/938], Loss: 0.0506
Epoch [3/3], Step [200/938], Loss: 0.0797
Epoch [3/3], Step [300/938], Loss: 0.0230
Epoch [3/3], Step [400/938], Loss: 0.0528
Epoch [3/3], Step [500/938], Loss: 0.0907
Epoch [3/3], Step [600/938], Loss:

In [10]:
# STEP 7: TEST THE MODEL

with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:

        if isinstance(model, NeuralNet):
            images = images.reshape(-1, 28*28)

        images, labels = images.to(device), labels.to(device)

        outputs = model(images)

        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 98.57 %
