<a href="https://colab.research.google.com/github/KevinFallon/mnist_project/blob/main/MNIST_CNN_FINAL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torchvision as tv
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

In [2]:
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")

# HYPER PARAMs
num_epochs = 3
batch_size = 128
learning_rate = 0.001


In [4]:
img_transforms = tv.transforms.Compose([
    tv.transforms.Resize((224, 224)), # Resize per VGG paper.
    tv.transforms.ToTensor(),
])

mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=img_transforms)
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=img_transforms)

mnist_train_loader = DataLoader(mnist_trainset, batch_size=batch_size, shuffle=True)
mnist_test_loader = DataLoader(mnist_testset, batch_size=batch_size, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 343025430.43it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 37468510.31it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 198021228.25it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 2875551.51it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [5]:
# VGG implements the 11 weight layers NN from the VGG paper that is found
# at https://arxiv.org/pdf/1409.1556.pdf
class MNIST_VGG(nn.Module):
  def __init__(self, num_classes=10):
    super(MNIST_VGG, self).__init__()

    # Initialize the modules we need to build the network
    self.layer1 = nn.Sequential(
      nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
    )

    self.layer2 = nn.Sequential(
      nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
    )

    self.layer3 = nn.Sequential(
      nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
      nn.ReLU(inplace=True),
      nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
    )

    self.layer4 = nn.Sequential(
      nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
      nn.ReLU(inplace=True),
      nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
    )

    self.layer5 = nn.Sequential(
      nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
      nn.ReLU(inplace=True),
      nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
    )

    self.fc1 = nn.Sequential(
      nn.Linear(in_features=512 * 7 * 7, out_features=4096),
      nn.ReLU(inplace=True),
    )
    self.fc2 = nn.Sequential(
      nn.Linear(in_features=4096, out_features=4096),
      nn.ReLU(inplace=True),
    )

    self.fc3 = nn.Sequential(
      nn.Linear(in_features=4096, out_features=num_classes),
    )

  def forward(self, x):
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    x = self.layer5(x)
   # flatten to prepare for the fully connected layers
    x = x.view(x.size(0), -1)
    x = self.fc1(x)
    x = self.fc2(x)
    x = self.fc3(x)

    return x

In [6]:
def train(model, optimizer, train_data_loader, loss_module, number_epochs, batch_size):

  # Set model to train mode
  model.train()

  total_step = len(train_data_loader)

  # Training loop
  for epoch in range(number_epochs):
    for i, (images, labels) in enumerate(train_data_loader):

      # Step 1: Move the input data to the device (GPU)
      images = images.to(device)
      labels = labels.to(device)

      # Step 2: Run the model on the input data
      output = model(images)

      # Step 3: Calculate the loss
      loss = loss_module(output, labels)

      # Step 4: Backprop
      # Set all gradients to zero. They would not be overwritten by default, just
      # added to existing values.
      optimizer.zero_grad()
      # Perform backprop
      loss.backward()

      # Step 5: Update the params
      optimizer.step()

      if (i+1) % batch_size == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}')

In [8]:
model = MNIST_VGG().to(device)

loss_module = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train(model, optimizer, mnist_train_loader, loss_module, num_epochs, batch_size)

Epoch [1/3], Step [128/469], Loss: 0.2170
Epoch [1/3], Step [256/469], Loss: 0.0807
Epoch [1/3], Step [384/469], Loss: 0.1189
Epoch [2/3], Step [128/469], Loss: 0.0116
Epoch [2/3], Step [256/469], Loss: 0.0037
Epoch [2/3], Step [384/469], Loss: 0.0282
Epoch [3/3], Step [128/469], Loss: 0.0773
Epoch [3/3], Step [256/469], Loss: 0.0119
Epoch [3/3], Step [384/469], Loss: 0.0092


In [9]:
def eval_model(model, data_loader):
  # Set model to eval mode for inference
  model.eval()

  true_preds, num_preds = 0., 0.

  with torch.no_grad(): # Deactivate gradients for inference
    for data_inputs, data_labels in data_loader:
      # Move the data to the same device as the model and loss.
      data_inputs = data_inputs.to(device)
      data_labels = data_labels.to(device)
      # Output is [batchsize, 10]. 10 is because each has a probability for each
      # number's prediction
      preds = model(data_inputs)
      # Take the max over the softmax results aka the probability the picture is
      # a drawing of a number between 0-9. The softmax results are in dim=1.
      # torch.max outputs the (64 max_probabilities, 64 indicies for corresponding softmax value)
      # Index for a softmax value maps to the number it represents, i.e.
      # softmax_class[5] returns the number 5.
      pred = torch.max(preds, dim=1, keepdim=True)[1]
      # View  data_labels as the same size as pred. Move to the CPU and compute
      # the sum of the number of equal/correct predictions.
      true_preds += pred.eq(data_labels.view_as(pred)).cpu().sum()
      # Get the number of predictions by looking at the first index of the shape
      # which is typically m num training examples.
      num_preds += data_labels.shape[0]
    print('Accuracy: {:.4f}'.format(100.0*float(true_preds) / len(data_loader.dataset)))

eval_model(model, mnist_test_loader)

Accuracy: 98.5500
