# MNIST with LeNet

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

DATA_ROOT = "./data"
BATCH_SIZE = 6
LEARNING_RATE = 0.01
EPOCH_NUMBER = 10
MODEL_PATH = "./mnist_net.pth"

## 1. Get Data

In [2]:
# Creates a transformer.
transform = transforms.Compose([
    transforms.ToTensor()
])

In [3]:
# Downloads the dataset.
trainset = torchvision.datasets.MNIST(root="./data", train=True, transform=transform, download=True)
testset = torchvision.datasets.MNIST(root="./data", train=False,transform=transform, download=True)

In [4]:
# Creates data loaders.
train_dataloader = torch.utils.data.DataLoader(dataset=trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_dataloader = torch.utils.data.DataLoader(dataset=testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

## 2. Build a Network

In [5]:
# Defines a LeNet.
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2)  # (i)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=400, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=84)
        self.fc3 = nn.Linear(in_features=84, out_features=10)
        
    def forward(self, inputs):
        z1 = self.conv1(inputs)
        a1 = F.relu(z1)
        a1 = F.max_pool2d(a1, 2)
              
        z2 = self.conv2(a1)
        a2 = F.relu(z2)
        a2 = F.max_pool2d(a2, 2)
        a2 = a2.view(-1, 400)
        
        z3 = self.fc1(a2)
        a3 = F.relu(z3)
        
        z4 = self.fc2(a3)
        a4 = F.relu(z4)
        
        z5 = self.fc3(a4)
        
        return z5

<font color=red>
    (i) The img size is 28 but the input size of LeNet is 32, thus padding is needed.
</font>

In [6]:
# Creates an object for the nn module.
net = Net()

## 3. Specify the Criterion and Create an Optimizer

In [7]:
# Uses cross entropy as criterion.
criterion = nn.CrossEntropyLoss()  # (i)

<font color=red>
    (i) The outputs are of size [BATCH_SIZE, 10], whereas the labels are [BATCH_SIZE]. Modification is needed.

In [8]:
# Uses SGD as optim.
optimizer = optim.SGD(params=net.parameters(), lr=LEARNING_RATE)

## 4. Train the Model

In [9]:
# Uses mini-batch gd to train the model
times_for_display = 2000
for epoch in range(EPOCH_NUMBER):
    running_loss = 0.0
    for i, data in enumerate(train_dataloader):
        inputs, labels = data
        
        optimizer.zero_grad()
        
        # Forward prop.
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        
        # Back prop.
        loss.backward()
        
        # Updates params.
        optimizer.step()
        
        running_loss += loss.item()  # (i)
        if i % times_for_display == times_for_display - 1:
            print(f"epoch: {epoch + 1:>2}, times: {i + 1:>5}, loss: {running_loss / times_for_display}")  # (ii), (iii)
            running_loss = 0.0

epoch:  1, times:  2000, loss: 1.1870577761756722
epoch:  1, times:  4000, loss: 0.23506574567733332
epoch:  1, times:  6000, loss: 0.15155215469530958
epoch:  1, times:  8000, loss: 0.12621233039686194
epoch:  1, times: 10000, loss: 0.11574725920433411
epoch:  2, times:  2000, loss: 0.09064269549781238
epoch:  2, times:  4000, loss: 0.07923866318311593
epoch:  2, times:  6000, loss: 0.07732646262271374
epoch:  2, times:  8000, loss: 0.07615156177741847
epoch:  2, times: 10000, loss: 0.07274835567868468
epoch:  3, times:  2000, loss: 0.05938030254223941
epoch:  3, times:  4000, loss: 0.06541370966519344
epoch:  3, times:  6000, loss: 0.0498236924357268
epoch:  3, times:  8000, loss: 0.051806494077342904
epoch:  3, times: 10000, loss: 0.05156350825688082
epoch:  4, times:  2000, loss: 0.03884538826545668
epoch:  4, times:  4000, loss: 0.04757294632136154
epoch:  4, times:  6000, loss: 0.04005073353668649
epoch:  4, times:  8000, loss: 0.040735335312816916
epoch:  4, times: 10000, loss: 

<font color=red>
    (i) Use item() to make a one-elem tensor to a scalar
    <br/>
    (ii) Indentation.
    <br/>
    (iii) running_loss / TIMES_FOR_DISPLAY
</font>

## 5. Test the Model

In [10]:
# Calculates the accuracy of the whole dataset and each digit
correct = 0.0  # (i)
total = 0.0

class_correct = [0.0 for i in range(10)]
class_total = [0.0 for i in range(10)]

with torch.no_grad():  # (ii)
    for data in test_dataloader:
        inputs, labels = data

        # Gets predictions using the trained model
        outputs = net(inputs)
        _, predictions = torch.max(input=outputs, dim=1)  # (iii)

        # Statistics.
        correct += (predictions == labels).sum()
        total += BATCH_SIZE

        for i in range(labels.size()[0]):  # (iv)
            label = labels[i]
            class_correct[label] += (predictions == labels)[i].item()  # (v)
            class_total[label] += 1
    
print(f"Accuracy: {correct / total * 100:.2f} %")
print()
for i in range(10):
    print(f"Accuracy of {i} is {class_correct[i] / class_total[i] * 100:.2f} %")

Accuracy: 98.97 %

Accuracy of 0 is 99.80 %
Accuracy of 1 is 98.77 %
Accuracy of 2 is 99.32 %
Accuracy of 3 is 98.51 %
Accuracy of 4 is 99.49 %
Accuracy of 5 is 99.55 %
Accuracy of 6 is 98.54 %
Accuracy of 7 is 98.93 %
Accuracy of 8 is 98.56 %
Accuracy of 9 is 98.51 %


<font color=red>
    (i) Use floats or doubles instead of Ints.
    <br/>
    (ii) Grads are not needed for testing, thus use no_grad().
    <br/>
    (iii) _, predictions, but not predictions, _. We need the indices here.
    <br/>
    (iv) Don't use: for i in range(BATCH_SIZE):
    <br/>
    (v) class_correct[labels[i]], not class_correct[i]
</font>

## 6. Save the Model

In [11]:
# Saves the model to the currect dir.
torch.save(net.state_dict(), MODEL_PATH)