In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
from torchvision import datasets, transforms

In [None]:
train_dataset = datasets.MNIST('./data', train=True, download=True,  # Downloads into a directory ../data
                               transform=transforms.ToTensor())
test_dataset = datasets.MNIST('./data', train=False, download=False,  # No need to download again
                              transform=transforms.ToTensor())

In [None]:
model = nn.Sequential(
    # In problem 2, we don't use the 2D structure of an image at all. Our network
    # takes in a flat vector of the pixel values as input.
    nn.Flatten(),
    nn.Linear(784, 170),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(170,30),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(30,10),
    nn.LogSoftmax(dim=1)
)
print(model)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=170, bias=True)
  (2): ReLU()
  (3): Dropout(p=0.2, inplace=False)
  (4): Linear(in_features=170, out_features=30, bias=True)
  (5): ReLU()
  (6): Dropout(p=0.1, inplace=False)
  (7): Linear(in_features=30, out_features=10, bias=True)
  (8): LogSoftmax(dim=1)
)


In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True)

In [None]:
# Some layers, such as Dropout, behave differently during training
model.train()

for epoch in range(20):
    for batch_idx, (data, target) in enumerate(train_loader):
        # Erase accumulated gradients
        optimizer.zero_grad()

        # Forward pass
        output = model(data)

        # Calculate loss
        loss = loss_fn(output, target)

        # Backward pass
        loss.backward()

        # Weight update
        optimizer.step()

    # Track loss each epoch
    print('Train Epoch: %d  Loss: %.4f' % (epoch + 1,  loss.item()))

Train Epoch: 1  Loss: 0.1426
Train Epoch: 2  Loss: 0.1735
Train Epoch: 3  Loss: 0.3792
Train Epoch: 4  Loss: 0.0834
Train Epoch: 5  Loss: 0.2280
Train Epoch: 6  Loss: 0.0043
Train Epoch: 7  Loss: 0.0572
Train Epoch: 8  Loss: 0.1258
Train Epoch: 9  Loss: 0.0042
Train Epoch: 10  Loss: 0.0208
Train Epoch: 11  Loss: 0.0685
Train Epoch: 12  Loss: 0.0273
Train Epoch: 13  Loss: 0.0101
Train Epoch: 14  Loss: 0.0058
Train Epoch: 15  Loss: 0.1274
Train Epoch: 16  Loss: 0.0009
Train Epoch: 17  Loss: 0.0100
Train Epoch: 18  Loss: 0.0172
Train Epoch: 19  Loss: 0.0184
Train Epoch: 20  Loss: 0.0116


In [None]:
# Putting layers like Dropout into evaluation mode
model.eval()

test_loss = 0
correct = 0

# Turning off automatic differentiation
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += loss_fn(output, target).item()  # Sum up batch loss
        pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max class score
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('Test set: Average loss: %.4f, Accuracy: %d/%d (%.4f)' %
      (test_loss, correct, len(test_loader.dataset),
       100. * correct / len(test_loader.dataset)))

Test set: Average loss: 0.0013, Accuracy: 9804/10000 (98.0400)
