In [1]:
!pip install wandb -qU

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m195.4/195.4 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.6/264.6 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# import
import cv2 #opencv
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable # auto gradient feature
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

In [24]:
import wandb
wandb.login()
wandb.init(
    project =  "CNN on MNIST",
    config = {
        "epochs":5,
        "batch_size":100,
        "lr":0.001
    }
)
config = wandb.config

In [25]:
# data transformers --> Image normalization, converting to tensors

# Tese values are used to normalize the image tensors

mean = 0.1307
std = 0.3081

# Transormer for train and test data
transform_original = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean,std)
])

# Transformer for new images
transform_inference = transforms.Compose([
    transforms.Resize((28,28)),
    transforms.ToTensor(),
    transforms.Normalize(mean,std)
])


In [26]:
# loading the train and test data

train_dataset = datasets.MNIST(
    root = './data',
    train = True,
    transform = transform_original,
    download=True
)

test_dataset = datasets.MNIST(
    root = './data',
    train = False,
    transform = transform_original,
    download=True
)

In [27]:
# creating training and testing minibatches

train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=config.batch_size,
    shuffle = True
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=config.batch_size,
    shuffle = True
)

print(f"No of Images in the train dataset: {len(train_dataset)}")
print(f"No of Images in the test dataset: {len(test_dataset)}")
print(f"No of Batches in the train mini bactht: {len(train_loader)}")
print(f"No of Batches in the test mini bactht: {len(test_loader)}")

No of Images in the train dataset: 60000
No of Images in the test dataset: 10000
No of Batches in the train mini bactht: 600
No of Batches in the test mini bactht: 100


In [28]:
# defining the network architecture

class CNN(nn.Module):
  def __init__(self):
    super(CNN,self).__init__()
    # first CNN layer
    self.cnn1 = nn.Conv2d(in_channels=1,out_channels=8, kernel_size=3, stride=1, padding=1)
    self.batchnorm1 = nn.BatchNorm2d(8)# Batch normalization --> output feature maps of the first convolution layer
    self.relu = nn.ReLU()# ReLu Activation
    self.maxpool1 = nn.MaxPool2d(kernel_size=2)# max pooling

    # now the image is 14 x 14
    self.cnn2 = nn.Conv2d(in_channels=8,out_channels=32, kernel_size=3, stride=1, padding=1)
    self.batchnorm2 = nn.BatchNorm2d(32)
    self.relu = nn.ReLU()
    self.maxpool2 = nn.MaxPool2d(kernel_size=2)

    # FCL
    self.fc1 = nn.Linear(
        in_features=1568,
        out_features=650,
    )

    # dropout layer
    self.dropout = nn.Dropout(p=0.8)

    # FCL
    self.fc2 = nn.Linear(
        in_features=650,
        out_features=10,
    )

  def forward(self,input_image):
    # CNN1
    out = self.cnn1(input_image)
    out = self.batchnorm1(out)
    out = self.relu(out)
    out = self.maxpool1(out)

    # CNN2
    out = self.cnn2(out)
    out = self.batchnorm2(out)
    out = self.relu(out)
    out = self.maxpool2(out)

    # Flatterning the feature map
    out = out.view(-1,1568)

    # Fully connected Layers
    out = self.fc1(out)
    out = self.relu(out)
    out = self.dropout(out)
    out = self.fc2(out)

    return out


In [30]:
# initializing the objects

cnn = CNN()

CUDA = torch.cuda.is_available()

if CUDA:
  cnn = cnn.cuda()

loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(),config.lr)

In [31]:
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy = []



for epoch in range(config.epochs):
  iteration = 0
  correct_predictions = 0
  iteration_loss = 0

  # model on train mode
  cnn.train()
  # print(f"iteration {i+1} is starting...")
  for i, (inputs,lables) in enumerate(train_loader):
    inputs = Variable(inputs)
    lables = Variable(lables)

    # Deploy inputs and lables into the GPU

    inputs = inputs.cuda()
    lables = lables.cuda()

    optimizer.zero_grad()

    output = cnn(inputs)
    loss_val = loss(output,lables)
    lteration_loss = iteration_loss+ loss_val.item()

    # backpropergation
    loss_val.backward()
    optimizer.step()

    # gettingh the number with highest probability
    _, predicted_numbers = torch.max(output,1)

    correct_predictions = correct_predictions + (predicted_numbers == lables.data).sum()
    iteration =iteration+1
    train_accuracy.append((correct_predictions/len(train_loader)*100))

    # Testing
    test_loss = 0
    test_correct = 0
    iteration = 0

    # evaluation mode - Desable backprop
    cnn.eval()

  for i, (inputs, lables) in enumerate(test_loader):
    inputs = Variable(inputs)
    lables = Variable(lables)

    # Deploy inputs and lables into the GPU

    inputs = inputs.cuda()
    lables = lables.cuda()

    output = cnn(inputs)
    loss_val = loss(output,lables)
    test_loss = test_loss + loss_val.item()


    # gettingh the number with highest probability
    _, predicted_numbers = torch.max(output,1)

    test_correct = test_correct + (predicted_numbers == lables.data).sum()
    iteration =iteration+1
    test_accuracy = [].append((correct_predictions/len(test_loader)*100))




  params = {
      "Epoch": epoch+1,
      "Train Accuracy": (correct_predictions/len(train_loader)),
      "Train Loss": lteration_loss/len(train_loader),
      "Test Accuracy": (test_correct/len(test_loader)),
      "Test Loss": test_loss/len(test_loader)
  }
  wandb.log(params)
  print(f"Epoch: {epoch+1}/{config.epochs}, Train Accuracy: {(correct_predictions/len(train_loader))}, Train Loss: { lteration_loss/len(train_loader)}, Test Accuracy: {(test_correct/len(test_loader))}, Test Loss: { test_loss/len(test_loader)}")

wandb.finish()
print("Training is done")



Epoch: 1/5, Train Accuracy: 94.6300048828125, Train Loss: 0.00011620024840037028, Test Accuracy: 98.57999420166016, Test Loss: 0.04744018118828535
Epoch: 2/5, Train Accuracy: 98.4366683959961, Train Loss: 3.58390063047409e-05, Test Accuracy: 98.94999694824219, Test Loss: 0.03203894372330979
Epoch: 3/5, Train Accuracy: 98.8800048828125, Train Loss: 6.255306303501129e-05, Test Accuracy: 99.04999542236328, Test Loss: 0.028834060807712378
Epoch: 4/5, Train Accuracy: 99.05333709716797, Train Loss: 4.84237199028333e-05, Test Accuracy: 98.82999420166016, Test Loss: 0.033835942982695995
Epoch: 5/5, Train Accuracy: 99.0683364868164, Train Loss: 1.4968571874002615e-05, Test Accuracy: 99.00999450683594, Test Loss: 0.029959587916964667


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Epoch,▁▃▅▆█
Test Accuracy,▁▇█▅▇
Test Loss,█▂▁▃▁
Train Accuracy,▁▇███
Train Loss,█▂▄▃▁

0,1
Epoch,5.0
Test Accuracy,99.00999
Test Loss,0.02996
Train Accuracy,99.06834
Train Loss,1e-05


Training is done


In [32]:
# saving the model
torch.save(cnn.state_dict(),'CNN-MNIST.pth')