## Import all the necessary libraries

Source: https://github.com/akshat57/Blind-Descent/blob/main/Blind_Descent-1-CNN.ipynb

In [19]:
import numpy as np
import torch
import sys
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils import data
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.datasets import CIFAR10

import matplotlib.pyplot as plt
import time

from sklearn.metrics import confusion_matrix, classification_report

cuda = torch.cuda.is_available()
cuda = False

## Download the MNIST and CIFAR10 datasets

In [20]:
train = MNIST('./MNIST_data', train=True, download=True, transform=transforms.ToTensor())
test = MNIST('./MNIST_data', train=False, download=True, transform=transforms.ToTensor())
train_MNIST_data = train.data; train_MNIST_labels = train.targets
test_MNIST_data = test.data; test_MNIST_labels = test.targets

train = CIFAR10('./CIFAR10_data', train=True, download=True, transform=transforms.ToTensor())
test = CIFAR10('./CIFAR10_data', train=False, download=True, transform=transforms.ToTensor())
train_CIFAR10_data = train.data; train_CIFAR10_labels = train.targets
test_CIFAR10_data = test.data; test_CIFAR10_labels = test.targets

print()
print("MNIST is already an array")
print(train_MNIST_data.shape, train_MNIST_labels.shape, test_MNIST_data.shape, test_MNIST_labels.shape)
print()
print("CIFAR10 is a list of arrays")
print(len(train_CIFAR10_data), len(train_CIFAR10_labels), len(test_CIFAR10_data), len(test_CIFAR10_labels))
print(train_CIFAR10_data[0].shape, test_CIFAR10_data[0].shape)

Files already downloaded and verified
Files already downloaded and verified

MNIST is already an array
torch.Size([60000, 28, 28]) torch.Size([60000]) torch.Size([10000, 28, 28]) torch.Size([10000])

CIFAR10 is a list of arrays
50000 50000 10000 10000
(32, 32, 3) (32, 32, 3)


## Dataloader

In [21]:
class CIFAR10Dataset(data.Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)

    def __getitem__(self,index):
        X = np.transpose(self.X[index], (2, 0, 1)) / 255
        X = X.astype(float)
        Y = self.Y[index]
        return X,Y

class MNIST_Dataset(data.Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)

    def __getitem__(self,index):
        X = np.pad(self.X[index], 2) / 255
        X = np.repeat(X[:, :, np.newaxis], 3, axis = 2)
        X = np.transpose(X, (2, 0, 1))
        X = X.astype(float)
        Y = self.Y[index]
        return X,Y

Using the torch.utils.data DataLoader, we shuffle the data and set the batch size

In [22]:
num_workers = 8 if cuda else 0 
batch_size = 64
    
# MNIST Training
train_dataset = MNIST_Dataset(train_MNIST_data, train_MNIST_labels)

train_loader_args = dict(shuffle=True, batch_size=batch_size, num_workers=num_workers, pin_memory=True) if cuda\
                    else dict(shuffle=True, batch_size=batch_size)
train_MNIST_loader = data.DataLoader(train_dataset, **train_loader_args)

# MNIST Testing
test_dataset = MNIST_Dataset(test_MNIST_data, test_MNIST_labels)

test_loader_args = dict(shuffle=False, batch_size=batch_size, num_workers=num_workers, pin_memory=True) if cuda\
                    else dict(shuffle=False, batch_size=1)
test_MNIST_loader = data.DataLoader(test_dataset, **test_loader_args)

# CIFAR10 Training
train_dataset = CIFAR10Dataset(train_CIFAR10_data, train_CIFAR10_labels)

train_loader_args = dict(shuffle=True, batch_size=batch_size, num_workers=num_workers, pin_memory=True) if cuda\
                    else dict(shuffle=True, batch_size=batch_size)
train_CIFAR10_loader = data.DataLoader(train_dataset, **train_loader_args)

# CIFAR10 Testing
test_dataset = CIFAR10Dataset(test_CIFAR10_data, test_CIFAR10_labels)

test_loader_args = dict(shuffle=False, batch_size=batch_size, num_workers=num_workers, pin_memory=True) if cuda\
                    else dict(shuffle=False, batch_size=1)
test_CIFAR10_loader = data.DataLoader(test_dataset, **test_loader_args)

## Define our Neural Network Model 
We define our model using the torch.nn.Module class

In [23]:
class MyCNN_Model(nn.Module):
    def __init__(self):
        super(MyCNN_Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size = 5)
        self.pool1 = nn.MaxPool2d(kernel_size = 2)
        self.conv2 = nn.Conv2d(32, 32, kernel_size = 5)
        self.pool2 = nn.MaxPool2d(kernel_size = 2)
        self.conv3 = nn.Conv2d(32, 10, kernel_size = 5)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.conv3(x)
        x = x.view(-1, 10)
        
        return x

## Create the model and define the Loss and Optimizer

In [24]:
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if cuda else "cpu")
model = MyCNN_Model()
model.to(device)
print(model)

MyCNN_Model(
  (conv1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(32, 10, kernel_size=(5, 5), stride=(1, 1))
)


In [25]:
def train_epoch(model, train_loader, criterion):
    model.train()

    running_loss = 0.0
    predictions = []
    ground_truth = []
    loss_den = 1
    
    start_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):   
        # lr = np.power(10, np.random.uniform(-6, 1))
        lr = 0.001

        data = data.to(device)
        target = target.to(device)
    
        #previous model
        outputs = model(data.float())
        _, predicted = torch.max(outputs.data, 1)
        total_predictions = target.size(0)
        correct_predictions = (predicted == target).sum().item()
        acc = (correct_predictions/total_predictions)*100.0
        
        loss = criterion(outputs, target)
        loss.backward()

        with torch.no_grad():
            model.conv1.weight -= (lr * model.conv1.weight.grad).float()
            model.conv2.weight -= (lr * model.conv2.weight.grad).float()
            model.conv3.weight -= (lr * model.conv3.weight.grad).float()

        outputs = model(data.float())
        _, predicted = torch.max(outputs.data, 1)
        total_predictions = target.size(0)
        correct_predictions = (predicted == target).sum().item()
        acc_new = (correct_predictions/total_predictions)*100.0
        
        loss_new = criterion(outputs, target)
        loss_den += 1

        #calculuating confusion matrix
        predictions += list(predicted.detach().cpu().numpy())
        ground_truth += list(target.detach().cpu().numpy())

        '''
        if loss_new.item() > loss.item():
            model.conv1.weight += lr * model.conv1.weight.grad
            model.conv2.weight += lr * model.conv2.weight.grad
            model.conv3.weight += lr * model.conv3.weight.grad

            running_loss += loss.item()
        else:
            running_loss += loss_new.item()
        '''

        model.conv1.weight.grad.zero_()
        model.conv2.weight.grad.zero_()
        model.conv3.weight.grad.zero_()
        
    end_time = time.time()

    running_loss /= loss_den
    
    print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')
    
    return running_loss, model

## Create a function that will evaluate our network's performance on the test set

In [26]:
def test_model(model, test_loader, criterion):
    with torch.no_grad():
        model.eval()

        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0
        
        predictions = []
        ground_truth = []

        for batch_idx, (data, target) in enumerate(test_loader):   
            data = data.to(device)
            target = target.to(device)

            outputs = model(data.float())

            _, predicted = torch.max(outputs.data, 1)
            total_predictions += target.size(0)
            correct_predictions += (predicted == target).sum().item()

            loss = criterion(outputs, target).detach()
            running_loss += loss.item()
            
            #calculuating confusion matrix
            predictions += list(predicted.detach().cpu().numpy())
            ground_truth += list(target.detach().cpu().numpy())
        
        #write_confusion_matrix('Testing', ground_truth, predictions)
        running_loss /= len(test_loader)
        acc = (correct_predictions/total_predictions)*100.0
        print('Testing Loss: ', running_loss)
        print('Testing Accuracy: ', acc, '%')
        return running_loss, acc


## Train the model for N epochs
We call our training and testing functions in a loop, while keeping track of the losses and accuracy. 

In [27]:
n_epochs = 40

model = MyCNN_Model(); model.to(device)
for i in range(n_epochs):
    train_loss, model = train_epoch(model, train_MNIST_loader, criterion)
    test_loss, MNIST_test_acc = test_model(model, test_MNIST_loader, criterion)
    print('='*20)

model = MyCNN_Model(); model.to(device)
for i in range(n_epochs):
    train_loss, model = train_epoch(model, train_CIFAR10_loader, criterion)
    test_loss, CIFAR10_test_acc = test_model(model, test_CIFAR10_loader, criterion)
    print('-'*20)

Training Loss:  0.0 Time:  75.84152889251709 s
Testing Loss:  1.8634493719637395
Testing Accuracy:  69.71000000000001 %
Training Loss:  0.0 Time:  73.66168093681335 s
Testing Loss:  0.6017704081712756
Testing Accuracy:  85.39999999999999 %
Training Loss:  0.0 Time:  73.92763423919678 s
Testing Loss:  0.40000363634203456
Testing Accuracy:  88.69 %
Training Loss:  0.0 Time:  72.94288635253906 s
Testing Loss:  0.34426975989272185
Testing Accuracy:  89.94 %
Training Loss:  0.0 Time:  72.81505012512207 s
Testing Loss:  0.30026563947604257
Testing Accuracy:  91.34 %
Training Loss:  0.0 Time:  72.93311047554016 s
Testing Loss:  0.2707764547317902
Testing Accuracy:  92.17 %
Training Loss:  0.0 Time:  73.13360691070557 s
Testing Loss:  0.24821601929133685
Testing Accuracy:  92.91 %
Training Loss:  0.0 Time:  73.312992811203 s
Testing Loss:  0.22732430675196438
Testing Accuracy:  93.54 %
Training Loss:  0.0 Time:  73.72670936584473 s
Testing Loss:  0.21479276808914488
Testing Accuracy:  93.78 %


Testing Loss:  1.5837869492175058
Testing Accuracy:  44.07 %
--------------------
Training Loss:  0.0 Time:  53.83308291435242 s
Testing Loss:  1.5667022596705704
Testing Accuracy:  45.050000000000004 %
--------------------
Training Loss:  0.0 Time:  53.713005781173706 s
Testing Loss:  1.5789252539422363
Testing Accuracy:  44.49 %
--------------------
Training Loss:  0.0 Time:  54.26631474494934 s
Testing Loss:  1.5487974836174399
Testing Accuracy:  45.21 %
--------------------
Training Loss:  0.0 Time:  53.66989207267761 s
Testing Loss:  1.522447308564931
Testing Accuracy:  46.33 %
--------------------
Training Loss:  0.0 Time:  53.80486536026001 s
Testing Loss:  1.5201728520071134
Testing Accuracy:  46.28 %
--------------------
Training Loss:  0.0 Time:  53.75895667076111 s
Testing Loss:  1.5331328783173115
Testing Accuracy:  45.46 %
--------------------
Training Loss:  0.0 Time:  54.035083532333374 s
Testing Loss:  1.5172969839636237
Testing Accuracy:  45.72 %
--------------------
T