## Import all the necessary libraries

Source: https://github.com/akshat57/Blind-Descent/blob/main/Blind_Descent-1-CNN.ipynb

In [6]:
import numpy as np
import torch
import sys
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils import data
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.datasets import CIFAR10

import matplotlib.pyplot as plt
import time

from sklearn.metrics import confusion_matrix, classification_report

cuda = torch.cuda.is_available()
cuda = False

## Download the MNIST and CIFAR10 datasets

In [7]:
train = MNIST('./MNIST_data', train=True, download=True, transform=transforms.ToTensor())
test = MNIST('./MNIST_data', train=False, download=True, transform=transforms.ToTensor())
train_MNIST_data = train.data; train_MNIST_labels = train.targets
test_MNIST_data = test.data; test_MNIST_labels = test.targets

## Dataloader

In [8]:
class MNIST_Dataset(data.Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)

    def __getitem__(self,index):
        X = np.pad(self.X[index], 2)
        X = np.repeat(X[:, :, np.newaxis], 3, axis = 2)
        X = np.transpose(X, (2, 0, 1))
        X = X.astype(float)
        Y = self.Y[index]
        return X,Y

Using the torch.utils.data DataLoader, we shuffle the data and set the batch size

In [9]:
def generateLoader(batch_size = 64):
    num_workers = 8 if cuda else 0 
        
    # MNIST Training
    train_dataset = MNIST_Dataset(train_MNIST_data, train_MNIST_labels)

    train_loader_args = dict(shuffle=True, batch_size=batch_size, num_workers=num_workers, pin_memory=True) if cuda\
                        else dict(shuffle=True, batch_size=batch_size)
    train_MNIST_loader = data.DataLoader(train_dataset, **train_loader_args)

    # MNIST Testing
    test_dataset = MNIST_Dataset(test_MNIST_data, test_MNIST_labels)

    test_loader_args = dict(shuffle=False, batch_size=batch_size, num_workers=num_workers, pin_memory=True) if cuda\
                        else dict(shuffle=False, batch_size=1)
    test_MNIST_loader = data.DataLoader(test_dataset, **test_loader_args)

    return train_MNIST_loader, test_MNIST_loader

## Define our Neural Network Model 
We define our model using the torch.nn.Module class

In [10]:
class MyCNN_Model(nn.Module):
    def __init__(self):
        super(MyCNN_Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size = 5)
        self.pool1 = nn.MaxPool2d(kernel_size = 2)
        self.conv2 = nn.Conv2d(32, 32, kernel_size = 5)
        self.pool2 = nn.MaxPool2d(kernel_size = 2)
        self.conv3 = nn.Conv2d(32, 10, kernel_size = 5)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.conv3(x)
        x = x.view(-1, 10)
        
        return x

## Create the model and define the Loss and Optimizer

In [11]:
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda" if cuda else "cpu")
model = MyCNN_Model()
model.to(device)
print(model)

MyCNN_Model(
  (conv1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(32, 10, kernel_size=(5, 5), stride=(1, 1))
)


In [12]:
def zeroMeanUnitUniform(model_new, model, lr):
    conv1weight = model.conv1.weight.detach().cpu().numpy()
    conv2weight = model.conv2.weight.detach().cpu().numpy()
    conv3weight = model.conv3.weight.detach().cpu().numpy()
    model_new.conv1.weight = nn.Parameter(torch.from_numpy(np.random.uniform(-1, 1, conv1weight.shape)).float())
    model_new.conv2.weight = nn.Parameter(torch.from_numpy(np.random.uniform(-1, 1, conv2weight.shape)).float())
    model_new.conv3.weight = nn.Parameter(torch.from_numpy(np.random.uniform(-1, 1, conv3weight.shape)).float())
    return model_new
    
def uniform(model_new, model, lr):
    conv1weight = model.conv1.weight.detach().cpu().numpy()
    conv2weight = model.conv2.weight.detach().cpu().numpy()
    conv3weight = model.conv3.weight.detach().cpu().numpy()
    model_new.conv1.weight = nn.Parameter(torch.from_numpy(np.random.uniform(conv1weight - lr, conv1weight + lr)).float())
    model_new.conv2.weight = nn.Parameter(torch.from_numpy(np.random.uniform(conv2weight - lr, conv2weight + lr)).float())
    model_new.conv3.weight = nn.Parameter(torch.from_numpy(np.random.uniform(conv3weight - lr, conv3weight + lr)).float())
    return model_new
    
def normal(model_new, model, lr):
    model_new.conv1.weight = nn.Parameter(torch.from_numpy(np.random.normal(model.conv1.weight.detach().cpu().numpy(), scale = lr)).float())
    model_new.conv2.weight = nn.Parameter(torch.from_numpy(np.random.normal(model.conv2.weight.detach().cpu().numpy(), scale = lr)).float())
    model_new.conv3.weight = nn.Parameter(torch.from_numpy(np.random.normal(model.conv3.weight.detach().cpu().numpy(), scale = lr)).float())
    return model_new

def randomDistribution(function, model_new, model, lr):
    return function(model_new, model, lr)

In [13]:
def train_epoch(model, train_loader, criterion, epoch, lr, function):
    model.train()

    running_loss = 0.0
    predictions = []
    ground_truth = []
    loss_den = 1
    
    start_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):   
        data = data.to(device)
        target = target.to(device)
    
        #previous model
        outputs = model(data.float())
        _, predicted = torch.max(outputs.data, 1)
        total_predictions = target.size(0)
        correct_predictions = (predicted == target).sum().item()
        acc = (correct_predictions/total_predictions)*100.0
        
        loss = criterion(outputs, target)
        
        #new model
        model_new = MyCNN_Model()
        model_new = randomDistribution(function, model_new, model, lr)
        model_new.to(device)
        
        outputs = model_new(data.float())
        _, predicted = torch.max(outputs.data, 1)
        total_predictions = target.size(0)
        correct_predictions = (predicted == target).sum().item()
        acc_new = (correct_predictions/total_predictions)*100.0
        
        loss_new = criterion(outputs, target)

        if loss_new.item() < loss.item():
            loss_den += 1
            running_loss += loss_new.item()
            model = model_new
            #calculuating confusion matrix
            predictions += list(predicted.detach().cpu().numpy())
            ground_truth += list(target.detach().cpu().numpy())
    
    end_time = time.time()

    running_loss /= loss_den
    
    print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')
    
    return running_loss, model

## Create a function that will evaluate our network's performance on the test set

In [14]:
def test_model(model, test_loader, criterion):
    with torch.no_grad():
        model.eval()

        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0
        
        predictions = []
        ground_truth = []

        for batch_idx, (data, target) in enumerate(test_loader):   
            data = data.to(device)
            target = target.to(device)

            outputs = model(data.float())

            _, predicted = torch.max(outputs.data, 1)
            total_predictions += target.size(0)
            correct_predictions += (predicted == target).sum().item()

            loss = criterion(outputs, target).detach()
            running_loss += loss.item()
            
            #calculuating confusion matrix
            predictions += list(predicted.detach().cpu().numpy())
            ground_truth += list(target.detach().cpu().numpy())
        
        #write_confusion_matrix('Testing', ground_truth, predictions)
        running_loss /= len(test_loader)
        acc = (correct_predictions/total_predictions)*100.0
        print('Testing Loss: ', running_loss)
        print('Testing Accuracy: ', acc, '%')
        return running_loss, acc


## Train the model for N epochs
We call our training and testing functions in a loop, while keeping track of the losses and accuracy. 

In [15]:
n_epochs = 40
lr = 0.001

uniformAcc = list(); normalAcc = list()

for bs in range(4, 10):
    batch_size = pow(2, bs)
    model = MyCNN_Model(); model.to(device)
    train_MNIST_loader, test_MNIST_loader = generateLoader(batch_size)
    for i in range(n_epochs):
        train_loss, model = train_epoch(model, train_MNIST_loader, criterion, i, lr, uniform)
        test_loss, MNIST_test_acc_uniform = test_model(model, test_MNIST_loader, criterion)
    uniformAcc.append(MNIST_test_acc_uniform)

for bs in range(4, 10):
    batch_size = pow(2, bs)
    model = MyCNN_Model(); model.to(device)
    train_MNIST_loader, test_MNIST_loader = generateLoader(batch_size)
    for i in range(n_epochs):
        train_loss, model = train_epoch(model, train_MNIST_loader, criterion, i, lr, normal)
        test_loss, MNIST_test_acc_normal = test_model(model, test_MNIST_loader, criterion)
    normalAcc.append(MNIST_test_acc_normal)    

print("MNIST_test_acc_uniform", uniformAcc)
print("MNIST_test_acc_normal", normalAcc)

Training Loss:  11.296766518986184 Time:  75.82500696182251 s
Testing Loss:  11.52239310309188
Testing Accuracy:  40.949999999999996 %
Training Loss:  14.176660717755624 Time:  77.2805860042572 s
Testing Loss:  14.00473974643638
Testing Accuracy:  53.400000000000006 %
Training Loss:  19.675419214326855 Time:  75.80709838867188 s
Testing Loss:  25.23082661155772
Testing Accuracy:  46.339999999999996 %
Training Loss:  27.2516233314071 Time:  80.98276257514954 s
Testing Loss:  26.662639523330906
Testing Accuracy:  48.9 %
Training Loss:  31.325083289215996 Time:  74.85376787185669 s
Testing Loss:  29.970438008408923
Testing Accuracy:  53.16 %
Training Loss:  34.07008101112807 Time:  74.54093980789185 s
Testing Loss:  32.47652908262331
Testing Accuracy:  52.53 %
Training Loss:  35.96467213932505 Time:  75.46948051452637 s
Testing Loss:  34.42345157473998
Testing Accuracy:  55.24 %
Training Loss:  39.720051864155785 Time:  74.19741177558899 s
Testing Loss:  41.57289912218658
Testing Accuracy