In [None]:
#Importing necessary libraries, downloading and loading training set and test set of MNIST dataset
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.datasets as datasets


MNIST_test = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

    

In [None]:
#CNN architecture parameters
#MNIST images are all 28x28 pixels
img_size = 28

#Defining forward propagation class
class Net(nn.Module):

    def __init__(self):
        
        super().__init__()
        
        self.conv1 = nn.Conv2d(1,32, kernel_size=5)
        self.conv2 = nn.Conv2d(32,64,kernel_size=5)
        self.conv3 = nn.Conv2d(64,128,kernel_size=5, padding = 2)
        
        self.pool = nn.MaxPool2d(2, 2)
        self.adaptive = nn.AdaptiveAvgPool2d((2, 2))  
    
        self.fc1 = nn.Linear(128*2*2, 512)
        self.fc2 = nn.Linear(512, 10)
        
    def forward(self, x):
        
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2,2))
        x = self.adaptive(x)   
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        
        return x

In [None]:
#Training the CNN model with the training set
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transform = transforms.ToTensor()
MNIST_train = datasets.MNIST(root='./data', train=True, download=True, transform=transform)

net = Net()
net.to(device)

#How many images been passed through at once (minimize with low RAM/memory), then loaded and shuffled with each epoch
img_size = 28
batch_size = 10
train_loader = DataLoader(MNIST_train, batch_size=batch_size, shuffle=True)

#Negative log likelihood loss function, better when using log_softmax in the output layer
loss_function = nn.NLLLoss()

#How many times the entire training set is passed through the network (usually 50-200 for medium-sized datasets)
optimizer = optim.Adam(net.parameters(), lr=0.001)
epochs = 2


for epoch in range(epochs):
    net.train()
    for batch_idx, (images, labels) in enumerate(train_loader):    
        images, labels = images.to(device), labels.to(device)
        #Reset gradients to zero before each backpropagation
        optimizer.zero_grad()
        output = net(images)
        loss = loss_function(output, labels)
        loss.backward()
        optimizer.step()
        
        
        if batch_idx % 50 == 0:
            print(f"Epoch {epoch+1}  Batch {batch_idx+1}/{len(train_loader)}  loss={loss.item():.4f}", end='\r', flush=True)
            

In [None]:
#Testing a different data loading strategy 


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.ToTensor()

MNIST_train = datasets.MNIST(root='./data', train=True, download=True, transform=transform)


img_size = 28
batch_size = 10
net = Net()

epochs = 2
#Reset gradients to zero before each backpropagation
optimizer = optim.Adam(net.parameters(), lr=0.001)



for epoch in range(epochs):
    for i in range(0, len(MNIST_train), batch_size):
        items = [MNIST_train[j] for j in range(i, min(i+batch_size, len(MNIST_train)))]
        images = torch.stack([it[0] for it in items], dim=0).to(device)   # it[0] = image tensor
        labels = torch.tensor([it[1] for it in items], dtype=torch.long).to(device)  # it[1] = label ints

        optimizer.zero_grad()
        output = net(images)
        loss = loss_function(output, labels)
        loss.backward()
        optimizer.step()
        
        print(f"EPOCH {epoch+1}, fraction complete: {i/len(MNIST_train):.4f}", end='\r', flush=True)