In [1]:
import os

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets

from tqdm.notebook import tqdm

from bokeh.plotting import figure
from bokeh.io import show
from bokeh.models import LinearAxis, Range1d
import numpy as np

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [3]:
# Hyperparameters
num_epochs = 6
num_classes = 10
batch_size = 100
learning_rate = 0.0005

In [4]:
DATA_PATH = './data/MNISTData'
MODEL_STORE_PATH = './data/pytorch_models/'

if not os.path.exists(MODEL_STORE_PATH):
    os.makedirs(MODEL_STORE_PATH)

In [5]:
# transforms to apply to the data
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

In [6]:
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root=DATA_PATH, train=True, transform=trans, download=True)
test_dataset = torchvision.datasets.MNIST(root=DATA_PATH, train=False, transform=trans)

In [7]:
# Data loader
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [8]:
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(7 * 7 * 64, 1000)
        self.fc2 = nn.Linear(1000, 10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [9]:
model = ConvNet()
model = model.to(device)
print(model)

ConvNet(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (drop_out): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=3136, out_features=1000, bias=True)
  (fc2): Linear(in_features=1000, out_features=10, bias=True)
)


In [10]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
# Train the model
total_step = len(train_loader)
loss_list = []
acc_list = []

model.train()

for epoch in tqdm(range(num_epochs)):
    for i, (images, labels) in enumerate(train_loader):
        
        images, labels = images.to(device), labels.to(device)
        
        # Run the forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss_list.append(loss.item())

        # Backprop and perform Adam optimisation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Track the accuracy
        total = labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        correct = (predicted == labels).sum().item()
        acc_list.append(correct / total)

        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                  .format(epoch + 1, num_epochs, i + 1, total_step, loss.item(),
                          (correct / total) * 100))

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

Epoch [1/6], Step [100/600], Loss: 0.1371, Accuracy: 97.00%
Epoch [1/6], Step [200/600], Loss: 0.0753, Accuracy: 99.00%
Epoch [1/6], Step [300/600], Loss: 0.1100, Accuracy: 96.00%
Epoch [1/6], Step [400/600], Loss: 0.1172, Accuracy: 96.00%
Epoch [1/6], Step [500/600], Loss: 0.0629, Accuracy: 97.00%
Epoch [1/6], Step [600/600], Loss: 0.0247, Accuracy: 99.00%
Epoch [2/6], Step [100/600], Loss: 0.0549, Accuracy: 98.00%
Epoch [2/6], Step [200/600], Loss: 0.1096, Accuracy: 96.00%
Epoch [2/6], Step [300/600], Loss: 0.1526, Accuracy: 94.00%
Epoch [2/6], Step [400/600], Loss: 0.1225, Accuracy: 99.00%
Epoch [2/6], Step [500/600], Loss: 0.0489, Accuracy: 99.00%
Epoch [2/6], Step [600/600], Loss: 0.0490, Accuracy: 97.00%
Epoch [3/6], Step [100/600], Loss: 0.0519, Accuracy: 98.00%
Epoch [3/6], Step [200/600], Loss: 0.0369, Accuracy: 97.00%
Epoch [3/6], Step [300/600], Loss: 0.0256, Accuracy: 98.00%
Epoch [3/6], Step [400/600], Loss: 0.0687, Accuracy: 98.00%
Epoch [3/6], Step [500/600], Loss: 0.076

In [12]:
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format((correct / total) * 100))

Test Accuracy of the model on the 10000 test images: 99.1 %


In [13]:
# Save the model and plot
torch.save(model.state_dict(), MODEL_STORE_PATH + 'ConvNet_model_MNIST_{}_epochs_lr_{}.ckpt'.format(num_epochs, learning_rate))

p = figure(y_axis_label='Loss', width=850, y_range=(0, 1), title='PyTorch ConvNet results')
p.extra_y_ranges = {'Accuracy': Range1d(start=0, end=100)}
p.add_layout(LinearAxis(y_range_name='Accuracy', axis_label='Accuracy (%)'), 'right')
p.line(np.arange(len(loss_list)), loss_list)
p.line(np.arange(len(loss_list)), np.array(acc_list) * 100, y_range_name='Accuracy', color='red')
show(p)