<a href="https://colab.research.google.com/github/ShreelakshmiCR/ML/blob/main/mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
## imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np
import datetime


In [None]:
%load_ext tensorboard
%matplotlib inline

from torch.utils.tensorboard import SummaryWriter

current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
cnn_log_dir = 'logs/cnn/' + current_time + '/train'
fc_log_dir = 'logs/fc/' + current_time + '/train'

cnn_writer = SummaryWriter(cnn_log_dir)
fc_writer = SummaryWriter(fc_log_dir)

In [None]:
# Hyper-Parameters
args={}
args['batch_size'] = 64
args['epochs'] = 3  #The number of Epochs is the number of times you go through the full dataset.
args['lr'] = 0.01 #Learning rate is how fast it will decend.
args['seed'] = 1 #random seed
args['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
## Loading data, splits
torch.manual_seed(args['seed'])

mnist_transform = transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])

mnist_dataset_train = datasets.MNIST('dataset/', train=True, download=True,
                   transform=mnist_transform)
train_loader = torch.utils.data.DataLoader(mnist_dataset_train,
    batch_size=args['batch_size'], shuffle=True)

mnist_dataset_test = datasets.MNIST('dataset/', train=False, download=True,
                   transform=mnist_transform)

test_loader = torch.utils.data.DataLoader(mnist_dataset_test,
    batch_size=args['batch_size'], shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 104144703.54it/s]


Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 22255317.62it/s]


Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 25566026.51it/s]


Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 15849025.60it/s]


Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw



In [None]:
print(len(mnist_dataset_train))
print(len(mnist_dataset_test))

60000
10000


In [None]:
## Model architecture
# Fully-connected
class FullyConnected(nn.Module):
    def __init__(self, image_shape, num_classes):
        super(FullyConnected, self).__init__()
        self.input_size = np.prod(image_shape)
        self.num_classes = num_classes

        self.fc1 = nn.Linear(self.input_size, self.input_size//2)
        self.fc2 = nn.Linear(self.input_size//2, self.num_classes)

    def forward(self, x):
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# CNNs
class CNN(nn.Module):
    #This defines the structure of the NN.
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        #Convolutional Layer/Pooling Layer/Activation
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        #Convolutional Layer/Dropout/Pooling Layer/Activation
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 320)
        #Fully Connected Layer/Activation
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        #Fully Connected Layer/Activation
        x = self.fc2(x)
        #Softmax gets probabilities.
        return x

In [None]:
fc_model = FullyConnected((28, 28), 10).to(args["device"])
cnn_model = CNN().to(args["device"])

loss_fn = nn.CrossEntropyLoss()

In [None]:
## Adam optimizer
# other optimizers : https://pytorch.org/docs/stable/optim.html#:~:text=torch.Tensor%20s.-,Algorithms,-Adadelta
fc_optimizer = optim.Adam(fc_model.parameters(), lr=args["lr"])
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=args["lr"])

In [None]:
## Printing nummber of parameters
print(f"Number of parameters in fully connected model : {sum(p.numel() for p in fc_model.parameters() if p.requires_grad)}")
print(f"Number of parameters in CNN model : {sum(p.numel() for p in cnn_model.parameters() if p.requires_grad)}")


Number of parameters in fully connected model : 311650
Number of parameters in CNN model : 21840


In [None]:
## Accuracy
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=args["device"])
            y = y.to(device=args["device"])

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    model.train()
    return num_correct / num_samples

In [None]:
## Train loop
fc_model.train()
cnn_model.train()

global_step = 0
for ep in range(args["epochs"]):
  for batch_idx, (data, target) in enumerate(train_loader):
      data = data.to(args["device"])
      target = target.to(args["device"])

      #This will zero out the gradients for this batch.
      fc_optimizer.zero_grad()
      cnn_optimizer.zero_grad()
      # Fetch model output
      fc_output = fc_model(data)
      cnn_output =  cnn_model(data)

      #Fetch loss
      fc_loss = loss_fn(fc_output, target)
      cnn_loss = loss_fn(cnn_output, target)

      # Saving loss values for plotting
      cnn_writer.add_scalar('loss', cnn_loss.item(), global_step)
      fc_writer.add_scalar('loss', fc_loss.item(), global_step)
      global_step += 1

      #dloss/dx for every Variable
      fc_loss.backward()
      cnn_loss.backward()

      #to do a one-step update on our parameter.
      fc_optimizer.step()
      cnn_optimizer.step()

In [None]:
print("#"*10 + f"  Epoch : {ep}  "  + "#"*10)
  print("Fully-connected")
  print(f"Accuracy on training set: {check_accuracy(train_loader, fc_model)*100:.2f}")
  print(f"Accuracy on test set: {check_accuracy(test_loader, fc_model)*100:.2f}")
  print()
  print("CNN")
  print(f"Accuracy on training set: {check_accuracy(train_loader, cnn_model)*100:.2f}")
  print(f"Accuracy on test set: {check_accuracy(test_loader, cnn_model)*100:.2f}")
  print("#"*30)
  print()

IndentationError: ignored