In [15]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms, datasets

In [16]:
bs = 64  #Batch Size

In [17]:
#Image Preprocessing

transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0,), (0.25,))])

In [18]:
# Initializing Datasets

trainset = datasets.MNIST('data/', train=True, transform=transforms, download=True)
testset = datasets.MNIST('data/', train=False, transform=transforms, download=True) 

In [19]:
len(trainset), len(testset)

(60000, 10000)

In [20]:
trainset

Dataset MNIST
    Number of datapoints: 60000
    Root location: data/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0,), std=(0.25,))
           )

In [21]:
train_dl = DataLoader(trainset, batch_size=bs, shuffle=True)
test_dl = DataLoader(testset, batch_size=bs, shuffle=True)

In [22]:
# Let's create a Fully Connected model using class approach

class FCLModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.input_layer = nn.Linear(784,1024)
    self.hidden_1 = nn.Linear(1024, 512)
    self.hidden_2 = nn.Linear(512, 256)
    self.output_layer = nn.Linear(256,10)

  def forward(self, x):
    input_batch = x.view(-1, 28*28)
    x = F.relu(self.input_layer(input_batch))
    x = F.relu(self.hidden_1(x))
    x = F.relu(self.hidden_2(x))
    x = self.output_layer(x)
    return x

In [23]:
# Let's define CNN Class
class CNNModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.input_layer = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
    self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
    self.fcl_1 = nn.Linear(32*13*13, 256)
    self.output_layer = nn.Linear(256, 10)

  def forward(self, x):
    input_batch = x.view(-1, 1, 28, 28)
    x = F.relu(self.input_layer(input_batch))
    x = self.max_pool(x)
    x = x.view(-1, 32*13*13)
    x = F.relu(x)
    x = F.relu(self.fcl_1(x))
    x = self.output_layer(x)
    return x


In [24]:
CNNModel()

CNNModel(
  (input_layer): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fcl_1): Linear(in_features=5408, out_features=256, bias=True)
  (output_layer): Linear(in_features=256, out_features=10, bias=True)
)

In [25]:
criterion = nn.CrossEntropyLoss()

In [26]:
def batch_accuracy(xb, yb):
  accs = (xb.argmax(1)) == yb
  return accs.float().mean().item()

def validate_epoch(model):
  epoch_loss = 0
  accuracy = 0
  model.eval()

  with torch.no_grad():
    for xb, yb in test_dl:
      output = model(xb)
      loss = criterion(output, yb)
      epoch_loss += loss.item()
      accuracy += batch_accuracy(output, yb)
    
    accuracy  /= len(test_dl)
    epoch_loss /= len(test_dl)
    return accuracy, epoch_loss

In [27]:
def train_model(model, num_epochs, print_every=1):
  for i in range(num_epochs):
    epoch_loss = 0
    accuracy = 0
    model.train()

    for xb, yb in train_dl:
      output = model(xb)
      loss = criterion(output, yb)
      epoch_loss += loss.item()
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      accuracy += batch_accuracy(output, yb)
    
    accuracy /= len(train_dl)
    epoch_loss /= len(train_dl)

    if (i+1) % print_every == 0:
      test_acc, test_loss = validate_epoch(model)
      print(f'| Epoch: {(i+1):02} | Train Loss: {epoch_loss:.3f} | Train Acc.:{accuracy:.3f} |'+  f' Val. Loss: {test_loss:.3f} | Val. Acc.: {test_acc:.3f} |')

In [28]:
fcl = FCLModel()
optimizer = torch.optim.Adam(fcl.parameters(), lr=0.01)
train_model(fcl, 5)

| Epoch: 01 | Train Loss: 0.365 | Train Acc.:0.904 | Val. Loss: 0.283 | Val. Acc.: 0.935 |
| Epoch: 02 | Train Loss: 0.255 | Train Acc.:0.938 | Val. Loss: 0.182 | Val. Acc.: 0.954 |
| Epoch: 03 | Train Loss: 0.189 | Train Acc.:0.953 | Val. Loss: 0.231 | Val. Acc.: 0.945 |
| Epoch: 04 | Train Loss: 0.185 | Train Acc.:0.954 | Val. Loss: 0.187 | Val. Acc.: 0.956 |
| Epoch: 05 | Train Loss: 0.165 | Train Acc.:0.958 | Val. Loss: 0.207 | Val. Acc.: 0.956 |


In [29]:
# cnn model
cnn = CNNModel()
optimizer = torch.optim.Adam(cnn.parameters(), lr=1e-2)
train_model(cnn, 5)

| Epoch: 01 | Train Loss: 0.312 | Train Acc.:0.942 | Val. Loss: 0.082 | Val. Acc.: 0.974 |
| Epoch: 02 | Train Loss: 0.065 | Train Acc.:0.980 | Val. Loss: 0.071 | Val. Acc.: 0.977 |
| Epoch: 03 | Train Loss: 0.053 | Train Acc.:0.984 | Val. Loss: 0.074 | Val. Acc.: 0.979 |
| Epoch: 04 | Train Loss: 0.053 | Train Acc.:0.985 | Val. Loss: 0.090 | Val. Acc.: 0.977 |
| Epoch: 05 | Train Loss: 0.048 | Train Acc.:0.986 | Val. Loss: 0.092 | Val. Acc.: 0.977 |
