In [None]:
# Importing the necessary libraries
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from statistics import mean
import datetime as datetime

In [None]:
# Data pre-processing
transform_train = transforms.Compose([transforms.Resize((70)), # Resizing the dataset samples to 70x70
                                transforms.RandomHorizontalFlip(), # Randomly flips the data samples horizontally
                                transforms.ToTensor(),
                                transforms.Normalize((0.5),(0.5))]) # Normalizing the dataset

transform_test = transforms.Compose([transforms.Resize((70)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5),(0.5))])

# Downloading and loading the dataset into the workspace
train = torchvision.datasets.MNIST(root='./data',train=True, transform=transform_train, download=True)
test = torchvision.datasets.MNIST(root='./data',train=False, transform=transform_test, download=True)
train_loader = torch.utils.data.DataLoader(dataset=train, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test, batch_size=32, shuffle=False)
cuda = torch.device('cuda') # training the model on a GPU

In [None]:
# Structure of the VGG-16 architecture
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.layer_1 = nn.Sequential(nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(64),
                                     nn.ReLU(),
                                     nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(64),
                                     nn.ReLU(),
                                     nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.layer_2 = nn.Sequential(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(128),
                                     nn.ReLU(),
                                     nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(128),
                                     nn.ReLU(),
                                     nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer_3 = nn.Sequential(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(256),
                                     nn.ReLU(),
                                     nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(256),
                                     nn.ReLU(),
                                     nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(256),
                                     nn.ReLU(),
                                     nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.layer_4 = nn.Sequential(nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(512),
                                     nn.ReLU(),
                                     nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(512),
                                     nn.ReLU(),
                                     nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(512),
                                     nn.ReLU(),
                                     nn.MaxPool2d(kernel_size=2, stride=2))
     
        self.layer_5 = nn.Sequential(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(512),
                                     nn.ReLU(),
                                     nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(512),
                                     nn.ReLU(),
                                     nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                     nn.BatchNorm2d(512),
                                     nn.ReLU(),
                                     nn.MaxPool2d(kernel_size=2, stride=2))
        height, width = 7, 7    
        self.layer_6 = nn.Sequential(nn.Linear(512*width*height, 4096),
                                     nn.ReLU(True),
                                     nn.Dropout(p=0.5),
                                     nn.Linear(4096, 4096),
                                     nn.ReLU(True),
                                     nn.Dropout(p=0.5),
                                     nn.Linear(4096, 10))
        for m in self.modules():
            if isinstance(m, torch.torch.nn.Conv2d) or isinstance(m, torch.torch.nn.Linear):
                torch.nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    m.bias.detach().zero_()
                    
        self.avgpool = torch.nn.AdaptiveAvgPool2d((height, width))

    def forward(self, x):
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.layer_4(x)
        x = self.layer_5(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.layer_6(x)
        return x

model = VGG16()
model = model.cuda() # Using CUDA enabled GPU for training the model
print(model)

In [None]:
loss = nn.CrossEntropyLoss() # Using Cross Entropy as Loss function
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # Setting the optimizer for training the model
cost = 0
epochs = 5 # Setting the number of epochs for which the model is to be trained

iterations = []
train_losses = []
test_losses = []
train_acc = []
test_acc = []

# Training the model
for epoch in range(epochs):
    model.train()
    correct = 0
    for X, Y in train_loader:
        X = X.to(cuda)
        Y = Y.to(cuda)
        optimizer.zero_grad()
        hypo = model(X)
        cost = loss(hypo, Y)
        cost.backward()
        optimizer.step()
        prediction = hypo.data.max(1)[1]
        correct += prediction.eq(Y.data).sum()

    model.eval()
    correct2 = 0
    for data, target in test_loader:
        data = data.to(cuda)
        target = target.to(cuda)
        output = model(data)
        cost2 = loss(output, target)
        prediction = output.data.max(1)[1]
        correct2 += prediction.eq(target.data).sum()

    print('Train set Accuracy: {:.2f}%'.format(100. * correct / len(train_loader.dataset)))
    print('Test set Accuracy: {:.2f}%'.format(100. * correct2 / len(test_loader.dataset)))
    print("Epoch : {:>4} / cost : {:>.9}".format(epoch + 1, cost))
    iterations.append(epoch)
    train_losses.append(cost.tolist())
    test_losses.append(cost2.tolist())
    train_acc.append((100*correct/len(train_loader.dataset)).tolist())
    test_acc.append((100*correct2/len(test_loader.dataset)).tolist())
    timestamp = datetime.datetime.now()
    print("Date/Time stamp", timestamp)

In [None]:
# Train Accuracy vs Validation accuracy plot
plt.figure(figsize=(10, 7))
plt.plot(train_acc, color='green', label='train accuracy')
plt.plot(test_acc, color='blue', label='validataion accuracy')
plt.legend()
plt.savefig('accuracy.png')
plt.show()

# Train Loss vs Validation Loss plot
plt.figure(figsize=(10, 7))
plt.plot(train_losses, color='orange', label='train loss')
plt.plot(test_losses, color='red', label='validataion loss')
plt.legend()
plt.savefig('loss.png')
plt.show()

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Utilizng CUDA enabled GPU
print(device)
classes = ['0','1','2','3','4','5','6','7','8','9'] # Classes of the MNIST dataset

# Building the heatmap of the correctly classified and misclassified data samples
heatmap = pd.DataFrame(data=0,index=classes,columns=classes)
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(16):
            true_label = labels[i].item()
            predicted_label = predicted[i].item()
            heatmap.iloc[true_label,predicted_label] += 1
_, ax = plt.subplots(figsize=(10, 8))
ax = sns.heatmap(heatmap, annot=True, fmt='d',cmap='YlGnBu')
figure = ax.get_figure()    
figure.savefig('heatmap.png', dpi=400)
plt.show()

In [None]:
# Saving the model
torch.save(model,'VGG16_MNIST.pth')