Import libraries

In [1]:
import torch
from torch import nn, optim
from torchsummary import summary
import torchvision
import matplotlib.pyplot as plt

Check device to use

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device: {}".format(device))

Device: cuda:0


Load data into train loader

In [3]:
batch_size_train = 64
batch_size_test = 64

train_dataset = torchvision.datasets.MNIST('/files/', train=True, download=True, transform=torchvision.transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True)
test_dataset = torchvision.datasets.MNIST('/files/', train=False, download=True, transform=torchvision.transforms.ToTensor())
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size_test, shuffle=True)

print(train_loader)

<torch.utils.data.dataloader.DataLoader object at 0x0000027D0BCAF710>


Create the model

In [4]:
model = nn.Sequential()

# Add 3 convolutional and pooling layers
model.add_module('Conv_1', nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(3,3), padding=1))
model.add_module('Relu_1', nn.ReLU())
model.add_module('Conv_2', nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), padding=1))
model.add_module('Relu_2', nn.ReLU())
model.add_module('MaxPool_1', nn.MaxPool2d(kernel_size=2, stride = 2))
model.add_module('Batchnorm_1', nn.BatchNorm2d(64))

model.add_module('Conv_3', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), padding=1))
model.add_module('Relu_3', nn.ReLU())
model.add_module('Conv_4', nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3,3), padding=1))
model.add_module('Relu_4', nn.ReLU())
model.add_module('MaxPool_2', nn.MaxPool2d(kernel_size=2, stride = 2))
model.add_module('Batchnorm_2', nn.BatchNorm2d(128))

model.add_module('Conv_5', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3,3), padding=1))
model.add_module('Relu_5', nn.ReLU())
model.add_module('Conv_6', nn.Conv2d(in_channels=256, out_channels=256, kernel_size = (3,3), padding=1))
model.add_module('Relu_6', nn.ReLU())
model.add_module('Conv_7', nn.Conv2d(in_channels=256, out_channels=256, kernel_size = (3,3), padding=1))
model.add_module('Relu_7', nn.ReLU())
model.add_module('Conv_8', nn.Conv2d(in_channels=256, out_channels=256, kernel_size = (3,3), padding=1))
model.add_module('Relu_8', nn.ReLU())
model.add_module('MaxPool_3', nn.MaxPool2d(kernel_size=2,stride=2))
model.add_module('Batchnorm_3', nn.BatchNorm2d(256))

model.add_module('Conv_9', nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3,3), padding=1))
model.add_module('Relu_9', nn.ReLU())
model.add_module('Conv_10', nn.Conv2d(in_channels=512, out_channels=512, kernel_size = (3,3), padding=1))
model.add_module('Relu_10', nn.ReLU())
model.add_module('Conv_11', nn.Conv2d(in_channels=512, out_channels=512, kernel_size = (3,3), padding=1))
model.add_module('Relu_11', nn.ReLU())
model.add_module('Conv_12', nn.Conv2d(in_channels=512, out_channels=512, kernel_size = (3,3), padding=1))
model.add_module('Relu_12', nn.ReLU())
model.add_module('MaxPool_4', nn.MaxPool2d(kernel_size=2,stride=2))
model.add_module('Batchnorm_4', nn.BatchNorm2d(512))

# Add a Flatten layer to the model
model.add_module('Flatten', nn.Flatten())

# Add a Linear layer with 64 units and relu activation
model.add_module('Linear_1', nn.Linear(in_features=512, out_features=64, bias=True))
model.add_module('Relu_L_1', nn.ReLU(inplace=True))
model.add_module('Dropout_1', nn.Dropout(p = 0.5))
model.add_module('Linear_3', nn.Linear(in_features = 64, out_features = 10, bias = True))
model.add_module('Out_activation', nn.Softmax(-1))

in_shape = (1,28,28)
model = model.to(device)
summary(model, input_size=(in_shape))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 28, 28]             640
              ReLU-2           [-1, 64, 28, 28]               0
            Conv2d-3           [-1, 64, 28, 28]          36,928
              ReLU-4           [-1, 64, 28, 28]               0
         MaxPool2d-5           [-1, 64, 14, 14]               0
       BatchNorm2d-6           [-1, 64, 14, 14]             128
            Conv2d-7          [-1, 128, 14, 14]          73,856
              ReLU-8          [-1, 128, 14, 14]               0
            Conv2d-9          [-1, 128, 14, 14]         147,584
             ReLU-10          [-1, 128, 14, 14]               0
        MaxPool2d-11            [-1, 128, 7, 7]               0
      BatchNorm2d-12            [-1, 128, 7, 7]             256
           Conv2d-13            [-1, 256, 7, 7]         295,168
             ReLU-14            [-1, 25

Set the hyperparameters

In [5]:
optimizer = optim.SGD(model.parameters(),momentum=0.9, lr=0.005, weight_decay= 5e-4)
loss_func = nn.CrossEntropyLoss(reduction='mean')
num_epochs = 100

Train the model

In [6]:
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    model.eval()
    test_loss = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = loss_func(outputs, labels)
            test_loss += loss.item()
    print(
        f"Epoch {epoch + 1}/{num_epochs} - Training Loss: {train_loss / len(train_loader):.4f} - Validation Loss: {test_loss / len(test_loader):.4f}")


Epoch 1/100 - Training Loss: 1.5460 - Validation Loss: 1.4800
Epoch 2/100 - Training Loss: 1.4808 - Validation Loss: 1.4722
Epoch 3/100 - Training Loss: 1.4773 - Validation Loss: 1.4694
Epoch 4/100 - Training Loss: 1.4749 - Validation Loss: 1.4707
Epoch 5/100 - Training Loss: 1.4731 - Validation Loss: 1.4680
Epoch 6/100 - Training Loss: 1.4727 - Validation Loss: 1.4681
Epoch 7/100 - Training Loss: 1.4724 - Validation Loss: 1.4704
Epoch 8/100 - Training Loss: 1.4708 - Validation Loss: 1.4690
Epoch 9/100 - Training Loss: 1.4707 - Validation Loss: 1.4693
Epoch 10/100 - Training Loss: 1.4699 - Validation Loss: 1.4692
Epoch 11/100 - Training Loss: 1.4695 - Validation Loss: 1.4693
Epoch 12/100 - Training Loss: 1.4694 - Validation Loss: 1.4683
Epoch 13/100 - Training Loss: 1.4690 - Validation Loss: 1.4687
Epoch 14/100 - Training Loss: 1.4683 - Validation Loss: 1.4700
Epoch 15/100 - Training Loss: 1.4674 - Validation Loss: 1.4681
Epoch 16/100 - Training Loss: 1.4680 - Validation Loss: 1.4707
E

KeyboardInterrupt: 

Evaluate the model

In [7]:
model.eval()
test_acc = 0
for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    predictions = model(images)
    accuracy = (torch.max(predictions, dim=-1, keepdim=True)[1].flatten() == labels).sum() / len(labels)
    test_acc += accuracy.item()
test_acc /= len(test_loader)
print(f"Test accuracy: {test_acc:.3f}")

Test accuracy: 0.996
