# Importing the inbuilt libraries for pytorch and iteration tools

In [8]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torchsummary import summary
import itertools

# Check if CUDA if it is available and get the number of available GPUs as I am are using multiple GPUs for this CNN. (parallel computing) 

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
num_gpus = torch.cuda.device_count()

cuda


# Defining transforms with data augmentation

In [3]:
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

# Adjusting the batch size and number of workers based on the number of GPUs available.

In [4]:
batch_size_per_gpu = 32
num_workers_per_gpu = 4
batch_size = batch_size_per_gpu * num_gpus
num_workers = num_workers_per_gpu * num_gpus

# Load CIFAR-10 dataset with updated batch size and number of workers
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

Files already downloaded and verified




Files already downloaded and verified


# Defining a function for creating and fine-tuning ResNet-18 model for CIFAR-10 classification.

In [6]:
def fine_tune_resnet18(nodes_hidden, strength_regularization, use_batch_norm):
    # Load pre-trained ResNet-18 model
    resnet = models.resnet18(weights=torchvision.models.resnet.ResNet18_Weights.DEFAULT)
    
    # Modify the fully connected layer to have the specified number of nodes in hidden layer
    num_ftrs = resnet.fc.in_features
    resnet.fc = nn.Linear(num_ftrs, nodes_hidden)
    
    # If using batch normalization, replace the BatchNorm layers with GroupNorm layers
    if use_batch_norm:
        resnet = replace_bn_with_gn(resnet)
    
    # Move model to GPU if available
    resnet = resnet.to(device)
    
    # Define loss function and optimizer with weight decay
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(resnet.parameters(), lr=0.001, momentum=0.9, weight_decay=strength_regularization)
    
    # Train the model for 10 epochs
    num_epochs = 10
    for epoch in range(num_epochs):
        resnet.train()
        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = resnet(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
    
    # Test the model after 10 epochs
    resnet.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = resnet(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total * 100
    print(f'Accuracy of the network on the {total} test images after 10 epochs: {accuracy}%')
    return accuracy

### Replace batch normalization with group normalization and perform a grid search to find the best hyper parameter in the list of hyper parameters namely
### nodes_hidden_list = [64, 128, 256]  this is the number of nodes in the hidden layer
### strength_regularization_list = [0.0001, 0.001, 0.01]
### use_batch_norm_values = [True, False]

In [9]:
def replace_bn_with_gn(model):
    # Create a copy of the model's state dictionary
    state_dict_copy = model.state_dict()

    # Iterate over the copied dictionary
    for name, module in state_dict_copy.items():
        if isinstance(module, nn.BatchNorm2d):
            num_groups = 32  # Assuming 32 as the default number of groups
            gn = nn.GroupNorm(num_groups, module.num_features, eps=module.eps, affine=module.affine)
            state_dict_copy[name] = gn
    # Load the modified state dictionary back into the model
    model.load_state_dict(state_dict_copy)
    return model

# Perform grid search for different hyperparameters
nodes_hidden_list = [64, 128, 256]
strength_regularization_list = [0.0001, 0.001, 0.01]
use_batch_norm_values = [True, False]

best_accuracy = 0
best_hyperparameters = {} 

for nodes_hidden, strength_regularization, use_bn in itertools.product(nodes_hidden_list, strength_regularization_list, use_batch_norm_values):
    print(f"Training model with nodes_hidden={nodes_hidden}, strength_regularization={strength_regularization}, use_batch_norm={use_bn}")
    accuracy = fine_tune_resnet18(nodes_hidden, strength_regularization, use_bn)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_hyperparameters = {'nodes_hidden': nodes_hidden, 'strength_regularization': strength_regularization, 'use_batch_norm': use_bn}

print("Grid search complete.")
print("Best hyperparameters:", best_hyperparameters)
print("Best accuracy:", best_accuracy)

Training model with nodes_hidden=64, strength_regularization=0.0001, use_batch_norm=True
Accuracy of the network on the 10000 test images after 10 epochs: 81.91000000000001%
Training model with nodes_hidden=64, strength_regularization=0.0001, use_batch_norm=False
Accuracy of the network on the 10000 test images after 10 epochs: 80.99%
Training model with nodes_hidden=64, strength_regularization=0.001, use_batch_norm=True
Accuracy of the network on the 10000 test images after 10 epochs: 80.78999999999999%
Training model with nodes_hidden=64, strength_regularization=0.001, use_batch_norm=False
Accuracy of the network on the 10000 test images after 10 epochs: 81.78999999999999%
Training model with nodes_hidden=64, strength_regularization=0.01, use_batch_norm=True
Accuracy of the network on the 10000 test images after 10 epochs: 82.5%
Training model with nodes_hidden=64, strength_regularization=0.01, use_batch_norm=False
Accuracy of the network on the 10000 test images after 10 epochs: 82.

# Best accuracy obtained after 10 epochs is 82.58% for 10000 images in the test set and the best hyper parameters are 'nodes_hidden'= 256, 'strength_regularization'= 0.01, 'use_batch_norm'= True

### To further improve the performance and get a better accuracy percentage I increased the number of epochs from 10 to 50 and tuned other hyper parameters like learning rate and weight decay.

# Hyper parameter tuning-2

### learning_rates = [0.001, 0.01, 0.1]
### weight_decays = [0.0001, 0.00001, 0.000001]
### initially I have taken a set of hyper parameters as hypothesis parameters that i thought would give the best accuracy, they were learning rate = 0.01 , weight_decay = 0.0001 but later I have commented that code.

# Define a function for creating and training the model with given hyperparameters

In [5]:
def train_model(learning_rate, weight_decay):
    # Load pre-trained ResNet-18 model with the most up-to-date weights
    resnet = models.resnet18(weights=torchvision.models.resnet.ResNet18_Weights.DEFAULT).to(device)

    # Modify the last fully connected layer to have 10 output classes for CIFAR-10
    num_ftrs = resnet.fc.in_features
    resnet.fc = nn.Linear(num_ftrs, 10).to(device)

    # Wrap the model with nn.DataParallel to use multiple GPUs
    resnet = nn.DataParallel(resnet)

    # Define loss function and optimizer with weight decay
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(resnet.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay)

    # Train the model for a reduced number of epochs
    num_epochs = 50
    resnet.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = resnet(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

    # Test the model
    resnet.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = resnet(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy of the network on the {total} test images after 50 epochs: {accuracy}%')
    return accuracy

# Perform grid search for different learning rates and weight decays
learning_rates = [0.001, 0.01, 0.1]
weight_decays = [0.0001, 0.00001, 0.000001]

best_accuracy = 0
best_hyperparameters = {}

for lr in learning_rates:
    for wd in weight_decays:
        print(f"Training model with learning rate={lr} and weight decay={wd}")
        accuracy = train_model(lr, wd)
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_hyperparameters = {'learning_rate': lr, 'weight_decay': wd}

print("Grid search complete.")
print("Best hyperparameters:", best_hyperparameters)
print("Best accuracy:", best_accuracy)

# printing the accuracy for a random guess of learning rate = 0.01 and weight_decay= 0.0001
#learning rate = 0.01
#weight_decay = 0.0001
#accuracy = train_model(learning_rate, weight_decay)
#print(f'Learning Rate: {learning_rate}, Weight Decay: {weight_decay}, Accuracy: {accuracy}%')

# but found that learning rate = 0.01 and weight_decay = 0.000001 are the best hyper parametrs
accuracy = train_model(best_hyperparameters['learning_rate'], best_hyperparameters['weight_decay'])
print(f'Best Learning Rate: {best_hyperparameters["learning_rate"]}, Best Weight Decay: {best_hyperparameters["weight_decay"]}, Best Accuracy: {best_accuracy}%')

Training model with learning rate=0.001 and weight decay=0.0001
Accuracy of the network on the 10000 test images after 50 epochs: 85.15%
Training model with learning rate=0.001 and weight decay=1e-05
Accuracy of the network on the 10000 test images after 50 epochs: 85.25%
Training model with learning rate=0.001 and weight decay=1e-06
Accuracy of the network on the 10000 test images after 50 epochs: 85.16%
Training model with learning rate=0.01 and weight decay=0.0001
Accuracy of the network on the 10000 test images after 50 epochs: 85.33%
Training model with learning rate=0.01 and weight decay=1e-05
Accuracy of the network on the 10000 test images after 50 epochs: 85.18%
Training model with learning rate=0.01 and weight decay=1e-06
Accuracy of the network on the 10000 test images after 50 epochs: 85.65%
Training model with learning rate=0.1 and weight decay=0.0001
Accuracy of the network on the 10000 test images after 50 epochs: 79.1%
Training model with learning rate=0.1 and weight de

unexpectedly I have printed the wrong accuracy in the last line instead of accuracy from the last iteration I printed the second last, but the final accuracy obtained is 86.44% for the 10000 images in the test set.

# Finally after training the model over 50 epochs we were able to reach a test accuracy of 86.44 %

# The best hyper parameters that got me this accuracy were.
# Best Learning Rate: 0.01, Best Weight Decay: 1e-06