In [1]:
import torch
from torchvision import transforms,datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import torch.nn as nn
from torch import optim
from torch.autograd import Variable
import time
import itertools
import json

In [2]:
# Check for CUDA and use it if available, else use CPU
compute_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using {compute_device} device for computation.')

Using cpu device for computation.


In [3]:
# Download and prepare the MNIST dataset for training and testing
training_dataset = datasets.MNIST(
    root='./dataset_storage',
    train=True,
    transform=transforms.Compose([transforms.ToTensor()]),
    download=True
)

testing_dataset = datasets.MNIST(
    root='./dataset_storage',
    train=False,
    transform=transforms.Compose([transforms.ToTensor()])
)

In [4]:
# Loaders for batching and shuffling the datasets
data_loaders = {
    'train_loader': DataLoader(training_dataset, batch_size=100, shuffle=True, num_workers=2),
    'test_loader': DataLoader(testing_dataset, batch_size=100, shuffle=True, num_workers=2)
}

learning_rates = [0.01, 0.001, 0.0005]
num_epochs = [10, 20, 50]

param_combinations = list(itertools.product(learning_rates, num_epochs))

In [5]:
# Defining the Neural Network Architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Defining layers in the network
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.dense = nn.Linear(32*7*7, 10)

    def forward_pass(self, input_data):
        input_data = self.layer1(input_data)
        input_data = self.layer2(input_data)
        input_data = input_data.view(input_data.size(0), -1)  # Flatten the tensor
        return self.dense(input_data)

In [6]:
results = {}

for lr, epoch_n in param_combinations:

    # Start program timer
    start_time = time.time()

    current_hyperparameters = str(lr)+","+str(epoch_n)

    results[current_hyperparameters] = {
        'train_losses': [],
        'val_losses': [],
        'accuracies': [],
        'execution_time': []
    }

    print(f"Training with lr={lr}, epoch={epoch_n}")

    
    # Instantiate the network, loss function and optimizer
    net = CNN().to(compute_device)
    criterion = nn.CrossEntropyLoss()
    opt = optim.Adam(net.parameters(), lr=lr)
    
    # Training Procedure
    def train_model(epochs, network, loaders):
        network.train()  # Set the network to training mode
    
        for e in range(epochs):
            for batch_idx, (inputs, targets) in enumerate(loaders['train_loader']):
                inputs, targets = inputs.to(compute_device), targets.to(compute_device)
                network.zero_grad()
                outputs = network.forward_pass(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                opt.step()

                results[current_hyperparameters]['train_losses'].append(loss.item())
    
                if batch_idx % 100 == 0:
                    print(f'Epoch {e+1}/{epochs}, Batch {batch_idx}, Loss: {loss.item()}')
    
        print("Training complete.")
    
    train_model(epoch_n, net, data_loaders)
    
    # Function to evaluate the model performance on the test dataset
    def evaluate_model(network, loaders):
        network.eval()  # Set the network to evaluation mode
        correct = 0
        total = 0
    
        with torch.no_grad():  # No need to track gradients for validation
            for inputs, targets in loaders['test_loader']:
                inputs, targets = inputs.to(compute_device), targets.to(compute_device)
                outputs = network.forward_pass(inputs)

                val_loss = criterion(outputs, targets).item()
                results[current_hyperparameters]['val_losses'].append(val_loss)
                
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()
    
        accuracy = 100 * correct / total
        results[current_hyperparameters]['accuracies'].append(accuracy)
        print(f'Accuracy of the network on the test images: {accuracy:.2f}%')
    
    # Evaluate the trained model
    evaluate_model(net, data_loaders)
    
    # End program timer
    end_time = time.time()
    total_time = end_time - start_time
    results[current_hyperparameters]['execution_time'].append(total_time)
    print(f"Total execution time: {total_time} seconds")

# Save dictionary to a JSON file
with open('CNNresults.json', 'w') as json_file:
    json.dump(results, json_file, indent=4)  # `indent` makes the file human-readable

Training with lr=0.01, epoch=1
Epoch 1/1, Batch 0, Loss: 2.3010306358337402
Epoch 1/1, Batch 100, Loss: 0.15400080382823944
Epoch 1/1, Batch 200, Loss: 0.12409922480583191
Epoch 1/1, Batch 300, Loss: 0.06551418453454971
Epoch 1/1, Batch 400, Loss: 0.13558310270309448
Epoch 1/1, Batch 500, Loss: 0.023453976958990097
Training complete.
0.04513672739267349
0.10174111276865005
0.04545227065682411
0.048181869089603424
0.08316335082054138
0.04794181510806084
0.05645256116986275
0.055097710341215134
0.041632503271102905
0.013498025946319103
0.03134528174996376
0.06690237671136856
0.07029944658279419
0.0370035283267498
0.02422330155968666
0.1417313516139984
0.05704381316900253
0.12960253655910492
0.12287607043981552
0.05381938815116882
0.037717778235673904
0.03228001669049263
0.15333902835845947
0.0598645843565464
0.12294132262468338
0.09055989235639572
0.04419853538274765
0.07037985324859619
0.043372318148612976
0.14949394762516022
0.06404400616884232
0.03764300420880318
0.04060574620962143
0