## Importing the Libraries

In [1]:
# Load in relevant libraries, and alias where appropriate
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

## Configuration

In [2]:
# Define relevant variables for the ML task
batch_size = 64
num_classes = 10
learning_rate = 0.001
num_epochs = 10

# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Loading the Dataset and preprocessing

In [3]:
#Loading the dataset and preprocessing
train_dataset = torchvision.datasets.MNIST(root = './data',
                                           train = True,
                                           transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download = True)


test_dataset = torchvision.datasets.MNIST(root = './data',
                                          train = False,
                                          transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1325,), std = (0.3105,))]),
                                          download=True)


train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [None]:
print(f"total no. of batches in trainloader: {len(train_loader)}")
print(f"total no. of batches in validloader: {len(valid_loader)}")

In [21]:
for image, label in train_loader:
  break 

print(f"One batch image shape: {image.shape}")
print(f"One batch mask shape: {label.shape}")
# batch size, number of channels, hight, width 

One batch image shape: torch.Size([64, 1, 32, 32])
One batch mask shape: torch.Size([64])


## LeNet5 from Scratch

In [6]:
#Defining the convolutional neural network
class LeNet5(nn.Module):
    def __init__(self, num_classes):
        super(LeNet5, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.fc = nn.Linear(400, 120)

        self.relu = nn.ReLU()
        
        self.fc1 = nn.Linear(120, 84)
        
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(84, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        #then we flatten the output of the conv2d 
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.relu(out)
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        return out

## Setting Hyperparameters

In [7]:
model = LeNet5(num_classes).to(device)

#Setting the loss function
cost = nn.CrossEntropyLoss()

#Setting the optimizer with the model parameters and learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#this is defined to print how many steps are remaining when training
total_step = len(train_loader)

## Training

In [8]:
total_step = len(train_loader)
def train_fn (data_loader, model, optimizer):
  #let's put it on the training mode (use batch normalize and drop out and backpropagation)
  model.train()
  total_loss = 0.0
  for epoch in range(num_epochs):
      for i, (images, labels) in enumerate(data_loader):  
          images = images.to(device)
          labels = labels.to(device)
          
          #Forward pass
          outputs = model(images)
          loss = cost(outputs, labels)
            
          # Backward and optimize
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
          
          total_loss += loss.item()

          if (i+1) % 400 == 0:
              print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, total_loss: {}' 
                        .format(epoch+1, num_epochs, i+1, total_step, loss.item(), total_loss))
  return total_loss

In [11]:
total_loss = train_fn(train_loader, model, optimizer)

Epoch [1/10], Step [400/938], Loss: 0.2348, total_loss: 109.4764673402533
Epoch [1/10], Step [800/938], Loss: 0.0876, total_loss: 143.27245785854757
Epoch [2/10], Step [400/938], Loss: 0.0141, total_loss: 175.1324817186687
Epoch [2/10], Step [800/938], Loss: 0.0228, total_loss: 197.916503053857
Epoch [3/10], Step [400/938], Loss: 0.0585, total_loss: 222.23228494566865
Epoch [3/10], Step [800/938], Loss: 0.1595, total_loss: 238.22364062123233
Epoch [4/10], Step [400/938], Loss: 0.0397, total_loss: 257.41462467523525
Epoch [4/10], Step [800/938], Loss: 0.0131, total_loss: 271.6125304181478
Epoch [5/10], Step [400/938], Loss: 0.0495, total_loss: 285.8262745080283
Epoch [5/10], Step [800/938], Loss: 0.0031, total_loss: 298.4760491732741
Epoch [6/10], Step [400/938], Loss: 0.1162, total_loss: 312.1323066066543
Epoch [6/10], Step [800/938], Loss: 0.0065, total_loss: 322.6203364979592
Epoch [7/10], Step [400/938], Loss: 0.0001, total_loss: 332.9555839530949
Epoch [7/10], Step [800/938], Loss:

## Testing

In [18]:
def valid_fn (data_loader, model):
  #let's put it on the training mode (use batch normalize and drop out and backpropagation)
  model.eval()
  total_loss = 0.0

  # Test the model
  # In test phase, we don't need to compute gradients (for memory efficiency)
    
  with torch.no_grad():
      correct = 0
      total = 0
      valid_loss = 0
      for images, labels in data_loader:
          images = images.to(device)
          labels = labels.to(device)
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

      print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

In [19]:
valid_fn (test_loader, model)

Accuracy of the network on the 10000 test images: 99.07 %


## Saving Model

In [23]:
torch.save(model.state_dict(), 'best_model.pt')
print("SAVED_MODEL")

SAVED_MODEL
