In [2]:
import torch
import torchvision
import torchvision.transforms as transforms

In [3]:
# TODO: try different values of hyperparameters and check how it will affect the classification performance.

batch_size=128
learning_rate=0.0001

In [5]:
# We normalize the data by its mean and variance.
transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)


# training validation split 
train_set, val_set = torch.utils.data.random_split(trainset, [50000, 10000])

trainloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0)

valloader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=0)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0)

In [12]:
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# TODO: Implement the LeNet according to the description.
class LeNet(nn.Module):

    def __init__(self):
        super(LeNet, self).__init__()
        # Here is an example of the convolutional layer where 
        # input channel=1, output channel=6, kernel size=5, padding=2
        # for this layer (only) we set padding=2 because LeNet is
        # expecting an image of size 32x32 instead of 28x28
        # implement other layers by yourself.
        self.conv1 = nn.Conv2d(1, 6, 5, padding=2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.linear1 = nn.Linear(16*5*5, 120)
        self.linear2 = nn.Linear(120, 84)
        self.linear3 = nn.Linear(84, 10)

    def forward(self, x):
        out = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        out = F.max_pool2d(F.relu(self.conv2(out)), (2, 2))
        out = out.view(-1, np.prod(out.size()[1:]))
        out = F.relu(self.linear1(out))
        out = F.relu(self.linear2(out))
        out = self.linear3(out)
        return out

# We've implemented a multi-layer perceptron model so that you can try to run the training algorithm
# and compare it with LeNet in terms of the classification performance.
class MLP(nn.Module):

    def __init__(self):
        super(MLP, self).__init__()
        self.input = nn.Linear(28 * 28, 512)
        self.hidden = nn.Linear(512, 256)
        self.output = nn.Linear(256, 10)
    
    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = torch.sigmoid(self.input(x))
        x = torch.sigmoid(self.hidden(x))
        x = self.output(x)
        return x

net = MLP()

# Uncomment this line after you implement it
net = LeNet()

In [13]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)

In [14]:
for epoch in range(10):  # loop over the dataset multiple times
    
    train_loss = 0.0
    train_acc = 0.0
    val_loss = 0.0
    val_acc = 0.0
    test_loss = 0.0
    test_acc = 0.0
    
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        
        
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        
        
        
        # statistics
        train_loss += loss.item()
        pred = torch.max(outputs, 1)[1]
        train_correct = (pred == labels).sum()
        train_acc += train_correct.item()

        
    # To get the best learned model, we need to do some statisticcs.
    # After training, we pick the model with best validation accuracy.
    with torch.no_grad():
        net.eval()

        for inputs, labels in valloader:

            predicts = net(inputs)

            loss = criterion(predicts, labels)
            val_loss += loss.item()
            pred = torch.max(predicts, 1)[1]
            val_correct = (pred == labels).sum()
            val_acc += val_correct.item()

        for inputs, labels in testloader:

            predicts = net(inputs)
            pred = torch.max(predicts, 1)[1]
            test_correct = (pred == labels).sum()
            test_acc += test_correct.item()

        net.train()
    print("Epoch %d" % epoch )

    print('Training Loss: {:.6f}, Training Acc: {:.6f}, Validation Acc: {:.6f}, Test Acc: {:.6f}'.format(train_loss / (len(train_set))*32,train_acc / (len(train_set)), val_acc / (len(val_set)),test_acc / (len(testset))))        

print('Finished Training')

Epoch 0
Training Loss: 0.574516, Training Acc: 0.105660, Validation Acc: 0.124700, Test Acc: 0.119100
Epoch 1
Training Loss: 0.569263, Training Acc: 0.154220, Validation Acc: 0.189600, Test Acc: 0.183700
Epoch 2
Training Loss: 0.562352, Training Acc: 0.216000, Validation Acc: 0.267000, Test Acc: 0.259700
Epoch 3
Training Loss: 0.550238, Training Acc: 0.346480, Validation Acc: 0.439400, Test Acc: 0.444300
Epoch 4
Training Loss: 0.522220, Training Acc: 0.516700, Validation Acc: 0.580200, Test Acc: 0.593000
Epoch 5
Training Loss: 0.444737, Training Acc: 0.634360, Validation Acc: 0.671100, Test Acc: 0.687900
Epoch 6
Training Loss: 0.295239, Training Acc: 0.745440, Validation Acc: 0.794300, Test Acc: 0.808300
Epoch 7
Training Loss: 0.188691, Training Acc: 0.814880, Validation Acc: 0.834900, Test Acc: 0.845200
Epoch 8
Training Loss: 0.144737, Training Acc: 0.846600, Validation Acc: 0.851500, Test Acc: 0.864900
Epoch 9
Training Loss: 0.123416, Training Acc: 0.863000, Validation Acc: 0.868900,