In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import gzip
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

### Downloading and loading the dataset

In [None]:
# torchvision.datasets.MNIST(root="./mnist_data/", download=True)

In [None]:
# image size and total number of images 
# as described in http://yann.lecun.com/exdb/mnist/
image_size = 28
total_images = 60000

file_path = "./mnist_data/MNIST/raw/"

with gzip.open(f"{file_path}train-images-idx3-ubyte.gz", "r") as f:
    # the first 16 bytes is the header, .read(16) effectively skips it
    f.read(16) 
    
    # defining how to read the data 
    buf = f.read(image_size * image_size * total_images)
    training_images = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
    
    # the dimensions here are should actually be 
    # (number of images, channels, image height, image width)
    # but I am loading it for plotting the image using numpy 
    # and I reshaped it later for torch
    training_images = training_images.reshape(total_images, image_size, image_size, 1)

In [None]:
with gzip.open(f"{file_path}train-labels-idx1-ubyte.gz", "r") as f:
    # the first 8 bytes is the header, skipping it
    f.read(8)
    buf = f.read()
    training_labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int_)

print(training_labels)

In [None]:
# plotting the image
image = training_images[3]
plt.imshow(image)
plt.show()

In [None]:
# converting the numpy arrays to tensors
training_images = torch.from_numpy(training_images)

# changing the dimension to match (number of images, channels, image height, image width)
training_images = torch.reshape(training_images, (60000, 1, 28, 28))
training_labels = torch.from_numpy(training_labels)

# normalizing values between 0 to 1
training_images /= 255.0

In [None]:
# checking if the training labels look okay
training_labels[:10]

In [None]:
# checking if the shape is fine
training_images.shape

In [None]:
# plotting the tensor as an image
# .squeeze() removes dimension = 1 which represents the channel
image = training_images[0].squeeze()
plt.imshow(image)
plt.show()

In [None]:
# this block of code is mildly modified but uses the same code as the pytorch tutorial 
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    
    # the transpose re-arranges the dimensions to H x W x channel
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# here, batch size controls how many images will be printed
trainloader_for_plot = torch.utils.data.DataLoader(
    training_images, 
    batch_size=6
)
dataiter = iter(trainloader_for_plot)
images = next(dataiter)

imshow(torchvision.utils.make_grid(images))

## Defining the CNN

In [None]:
# a slightly modified version of the convolutional net defined in the pytorch tutorial 
# https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) 
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
print(net)

In [None]:
# printing the total number of parameters
params = list(net.parameters())
print(len(params))

# the weight of first layer (self.conv1)
print(params[0].size())  # conv1's .weight

In [None]:
print(training_images[0].shape)

In [None]:
# running the NN on one example
input = training_images[0].unsqueeze(0)
out = net(input)
print(out)

In [None]:
# defining the loss function and optimization routine
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
# prepping data using TensorDataset and DataLoader
dataset = TensorDataset(training_images, training_labels)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# looping over the number of training runs
for epoch in range(15):
    
    running_loss = 0
    for n, data in enumerate(dataloader):
        inputs, labels = data

        # setting the parameter gradients back to zero
        # running the one example above affected the gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        
        # printing the loss for every 2000 mini-batches
        if n % 2000 == 1999:
            print(f'[{epoch + 1}, {n + 1:5d}] loss: {running_loss / 2000:.3f}')
            
            running_loss = 0

print('Finished Training')

### Testing the performance of the model

#### Reading the test data similar to how the training data was loaded

In [None]:
image_size = 28
total_images_testing = 10000
file_path = "./mnist_data/MNIST/raw/"

with gzip.open(f"{file_path}t10k-images-idx3-ubyte.gz", "r") as f:
    f.read(16)
    buf = f.read(image_size * image_size * total_images_testing)
    testing_images = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
    testing_images = testing_images.reshape(total_images_testing, 1, image_size, image_size)

with gzip.open(f"{file_path}t10k-labels-idx1-ubyte.gz", "r") as f:
    f.read(8)
    buf = f.read()
    testing_labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int_)

testing_images = torch.from_numpy(testing_images)
testing_labels = torch.from_numpy(testing_labels)

training_images /= 255.0

In [None]:
testset = TensorDataset(testing_images, testing_labels)
testloader = DataLoader(testset, batch_size=4)

In [None]:
# checking correctness (copy-pasted from the tutorial)
correct = 0
total = 0

# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        
        # calculate outputs by running images through the network
        outputs = net(images)
        
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')