### Step 1:
Import libraries <br>
numpy, pandas, torch <br>
DataLoader and Dataset from data module <br>
datasets and transforms from torchvision <br>

In [1]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms

### Step 2:
import cuda from torch and set the device to cuda or cpu depends if cuda is available

In [2]:
from torch import cuda

device = "cuda" if cuda.is_available() else "cpu"
device

'cpu'

### Step 3:
Split the dataset from data folder <br>
Set the root, train, transform, and download parameters using MNIST function

In [3]:
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

### Step 4:
Create the train and test dataloader<br>
Set the dataset, batch, and shuffle parameters

In [4]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

### Step 5:
Define the model class from Module with 2 functions: Init and forward<br>

Initilize the model with 2 conv layer and 1 linear layer: <br>
1st convolutional layer has a input dim of 1 and output dim of 10 with a kernel size of 5<br>
2nd convolutional layer has a input dim of 10 and output dim of 20 with a kernel size of 5<br>
the linear layer has a input dim of 320 and output dim of 10<br>
a max pooling of size 2<br>

Define the forward function: <br>
Use the ReLU on the results after applying pooling on both convolutional layers <br>
call log_softmax on the output layer <br>


In [5]:
class Model(torch.nn.Module):
  def __init__(self):
    super(Model, self).__init__()
    self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
    self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)

    self.l1 = torch.nn.Linear(320, 10)
    self.max = torch.nn.MaxPool2d(2)

  def forward(self, x):
    in_size = x.size(0)
    x = torch.nn.functional.relu(self.max(self.conv1(x)))
    x = torch.nn.functional.relu(self.max(self.conv2(x)))
    x = x.view(in_size, -1)
    return torch.nn.functional.log_softmax(self.l1(x))

### Step 6:
Instantiate the model and put it to device

In [6]:
model = Model()
model.to(device)

Model(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (l1): Linear(in_features=320, out_features=10, bias=True)
  (max): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

### Step 7
:Use CrossEntropyLoss with a mean reduction as the loss function <br>
use SGD as optimizer with a learning rate of 0.01 and momentum of 0.05 <br>

In [7]:
criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimus = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.05)

### Step 8:
Define the train method <br>
It has a epoch parameter representing the number of epoch the model trains <br>
Activate the training model of the model<br>
Iterate each data in the training dataloader: <br>
Using the model to predict with givin input <br>
Calulate the loss with prediction and label <br>
Prints the loss value for every 500 sample <br>
Resets the gradient and ue back propagation to update the optimizer <br>

In [8]:
def train(epoch, train_dataloader):
  model.train()
  for _, data in enumerate(train_dataloader, 0):
    inputs, label = data
    inputs.to(device)
    label.to(device)

    predict = model.forward(inputs)

    loss = criterion(predict, label)

    if _%500 == 0:
      print(f"epoch {epoch}: loss: {loss}")

    optimus.zero_grad()
    loss.backward()
    optimus.step()

### Step 9:
Define the valid function <br>
It has a parameter of test dataloader <br>
Activate the evaluation mode of the model <br>
Disable the gradient<br>
for each data sample in the test dataloader <br>
Put them to device and print the validation completion (Optional) <br>
Use the model to predict with given inputs
Get the max predict value at dimension 1 and get the number of correct labels <br>

In [9]:
def valid(test_dataloader):
  model.eval()
  total=0
  n_correct=0
  with torch.no_grad():
    for _, data in enumerate(test_dataloader, 0):
      inputs, label = data
      inputs.to(device)
      label.to(device)

      if total%500==0:
        print(f"Validation complete for {total} samples")

      predict = model.forward(inputs)
      total += label.size(0)
      val, idx = torch.max(predict, dim=1)
      n_correct += (idx==label).sum().item()

  return n_correct*100/total

### Step 10:
Start training <br>
Print out the steps and status <br>
Print out the accuracy <br>

In [10]:
print(f"Training starts......................")

for epoch in range(0,2):
    train(epoch, train_dataloader)

print('Training completed....................')

print('Test starts...........................')
acc = valid(test_dataloader)
print('Test completed........................')

print(f'The accuracy for this model is {acc}')


Training starts......................
epoch 0: loss: 2.282376289367676


  return torch.nn.functional.log_softmax(self.l1(x))


epoch 0: loss: 0.39797914028167725
epoch 0: loss: 0.7429326176643372
epoch 0: loss: 0.14587146043777466
epoch 1: loss: 0.35074713826179504
epoch 1: loss: 0.26746267080307007
epoch 1: loss: 0.050792891532182693
epoch 1: loss: 0.2074107676744461
Training completed....................
Test starts...........................
Validation complete for 0 samples
Validation complete for 4000 samples
Validation complete for 8000 samples
Test completed........................
The accuracy for this model is 96.67
