<a href="https://colab.research.google.com/github/JustinJiangNext/Handwritten-Digit-Recognizer/blob/main/MNIST_handwritten_digits.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title install libraries
!pip install torch torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
#import libraries

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
from torchvision import models


In [3]:
#@title Define Hyperparameters
num_classes = 10
num_epochs = 10
batch_size = 100
lr = 1e-3 #learning rate

In [4]:
#@title Downloading the data and loading into Pytorch DataLoader object

train_data = dsets.MNIST(root = './data', train = True,
                        transform = transforms.ToTensor(), download = True)

test_data = dsets.MNIST(root = './data', train = False,
                       transform = transforms.ToTensor())

if torch.cuda.is_available():
  train_data.train_data.cuda()
  train_data.train_labels.cuda()
  test_data.train_data.cuda()
  test_data.train_labels.cuda()

train_gen = torch.utils.data.DataLoader(dataset = train_data,
                                             batch_size = batch_size,
                                             shuffle = True)

test_gen = torch.utils.data.DataLoader(dataset = test_data,
                                      batch_size = batch_size,
                                      shuffle = False)

100%|██████████| 9.91M/9.91M [00:00<00:00, 37.0MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.17MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 8.91MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 7.09MB/s]


In [5]:
#@title Define model class

class LinearNN(nn.Module):
  def __init__(self):
    super(LinearNN, self).__init__()
    self.fc1 = nn.Linear(28 * 28, 128)
    self.fc2 = nn.Linear(128, 10)

  def forward(self, x):
    x = torch.flatten(x, start_dim=1)
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    x = F.softmax(x, dim=1)
    return x

class ConvolutionalNN(nn.Module):
  def __init__(self):
        super(ConvolutionalNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

  def forward(self, x):
      x = F.relu(self.conv1(x))
      x = self.pool(x)
      x = F.relu(self.conv2(x))
      x = self.pool(x)
      x = torch.flatten(x, start_dim=1)
      x = F.relu(self.fc1(x))
      x = self.fc2(x)
      return x


class DenseNetNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.densenet = models.densenet121(pretrained=True)
        self.densenet.features.conv0 = nn.Conv2d(1, 64, kernel_size = 3, padding = 1)
        self.densenet.classifier = nn.Linear(self.densenet.classifier.in_features, 10)

    def forward(self, x):
        print(x.size())
        return self.densenet(x)

In [8]:
#@title Training the model
net = LinearNN()
if torch.cuda.is_available():
  net.cuda()


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=lr)

# Training loop
net.train()
for epoch in range(num_epochs):
    total_loss = 0
    for i, (images, labels) in enumerate(train_gen):
        if torch.cuda.is_available():
          images, labels = images.cuda(), labels.cuda()

        # Forward pass
        outputs = net(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        if (i+1) % 10 == 0 or (i+1) == len(train_gen):
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_gen)}], Loss: {loss.item():.4f}")

    print(f"Epoch [{epoch+1}] Average Loss: {total_loss/len(train_gen):.4f}")

Epoch [1/10], Step [10/600], Loss: 2.2322
Epoch [1/10], Step [20/600], Loss: 2.1043
Epoch [1/10], Step [30/600], Loss: 1.9317
Epoch [1/10], Step [40/600], Loss: 1.8606
Epoch [1/10], Step [50/600], Loss: 1.7885
Epoch [1/10], Step [60/600], Loss: 1.7271
Epoch [1/10], Step [70/600], Loss: 1.7053
Epoch [1/10], Step [80/600], Loss: 1.6976
Epoch [1/10], Step [90/600], Loss: 1.6198
Epoch [1/10], Step [100/600], Loss: 1.6244
Epoch [1/10], Step [110/600], Loss: 1.6468
Epoch [1/10], Step [120/600], Loss: 1.5664
Epoch [1/10], Step [130/600], Loss: 1.6231
Epoch [1/10], Step [140/600], Loss: 1.6872
Epoch [1/10], Step [150/600], Loss: 1.5817
Epoch [1/10], Step [160/600], Loss: 1.5995
Epoch [1/10], Step [170/600], Loss: 1.6013
Epoch [1/10], Step [180/600], Loss: 1.6106
Epoch [1/10], Step [190/600], Loss: 1.6337
Epoch [1/10], Step [200/600], Loss: 1.5899
Epoch [1/10], Step [210/600], Loss: 1.5820
Epoch [1/10], Step [220/600], Loss: 1.5830
Epoch [1/10], Step [230/600], Loss: 1.5841
Epoch [1/10], Step [

In [10]:
#@title Evaluating Accuracy
net.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_gen:
        if torch.cuda.is_available():
          images, labels = images.cuda(), labels.cuda()
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on test data: {100 * correct / total:.2f}%")


Accuracy on test data: 96.92%
