In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as network
from torch import optim
import itertools
import time
import json

In [2]:
# Start program timer
start_time = time.time()

# Set parameters for data processing
num_workers = 0
batch_size = 20

# Data transformation pipeline
data_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,),
                                                                                  (0.5,))])

# Load MNIST dataset
train_set = datasets.MNIST(root='data_folder', train=True, download=True,
                           transform=data_transforms)
test_set = datasets.MNIST(root='data_folder', train=False, download=True,
                          transform=data_transforms)

# Initialize data loaders
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True,
                          num_workers=num_workers)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False,
                         num_workers=num_workers)

# [1] https://www.kaggle.com/code/fgiorgio/multi-layer-perceptron-mnist

In [3]:
# Define the neural network architecture
class DigitRecognizer(network.Module):
    def __init__(self):
        super(DigitRecognizer, self).__init__()
        self.fc1 = network.Linear(28 * 28, 512)
        self.fc2 = network.Linear(512, 512)
        self.fc3 = network.Linear(512, 10)
        self.dropout = network.Dropout(0.2)

    def forward(self, tensor):
        tensor = tensor.view(-1, 28 * 28)
        tensor = torch.relu(self.fc1(tensor))
        tensor = self.dropout(tensor)
        tensor = torch.relu(self.fc2(tensor))
        tensor = self.dropout(tensor)
        tensor = self.fc3(tensor)
        return tensor

In [4]:
learning_rates = [0.01, 0.001, 0.0005]
num_epochs = [10, 20, 50]
param_combinations = list(itertools.product(learning_rates, num_epochs))

results = {}

for lr, epoch_n in param_combinations:
    print(f"Training with lr={lr}, epoch={epoch_n}")

    # Set the loss function and optimizer
    digit_recognizer = DigitRecognizer()
    loss_function = network.CrossEntropyLoss()
    optimizer = optim.SGD(digit_recognizer.parameters(), lr=lr)

    current_hyperparameters = str(lr) +"," +str(epoch_n)
    results[current_hyperparameters] = {
        'train_losses': [],
        'val_losses': [],
        'accuracies': [],
        'execution_time': []
    }

    # Define the training process
    def train_network(epochs, model, loader):
        for epoch in range(epochs):
            running_loss = 0.0
            for images, labels in loader:
                optimizer.zero_grad()
                outputs = model(images)
                loss = loss_function(outputs, labels)
                loss.backward()
                optimizer.step()
                results[current_hyperparameters]['train_losses'].append(loss.item())
                running_loss += loss.item()
    
            print(f'Epoch {epoch + 1} complete: Avg. Loss: {running_loss / len(loader)}')

    # Define the testing process
    def test_network(model, loader):
        
        total_correct = 0
        total_samples = 0
        class_correct = list(0. for i in range(10))
        class_total = list(0. for i in range(10))
        with torch.no_grad():
            for inputs, labels in loader:
                outputs = model(inputs)

                val_loss = loss_function(outputs, labels).item()
                results[current_hyperparameters]['val_losses'].append(val_loss)
            
                loss = loss_function(outputs, labels)
                _, predicted = torch.max(outputs, 1)
                total_correct += (predicted == labels).sum().item()
                total_samples += labels.size(0)
                c = (predicted == labels).squeeze()
                for i in range(batch_size):
                    label = labels[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

        accuracy = 100 * total_correct / total_samples
        results[current_hyperparameters]['accuracies'].append(accuracy)
        print(f'Test accuracy: {100 * total_correct / total_samples}%')
        for i in range(10):
            print(f'Accuracy of digit {i}: {100 * class_correct[i] / class_total[i]}%')

    # Training and testing
    train_network(epoch_n, digit_recognizer, train_loader)
    test_network(digit_recognizer, test_loader)

    # End program timer
    end_time = time.time()
    total_time = end_time - start_time
    results[current_hyperparameters]['execution_time'].append(total_time)
    print(f"Total execution time: {total_time} seconds")


# Save results to a JSON file
with open('MLPresults.json', 'w') as json_file:
    json.dump(results, json_file, indent=4)

Training with lr=0.01, epoch=1
Epoch 1 complete: Avg. Loss: 0.5679545292245844
Test accuracy: 91.4%
Accuracy of digit 0: 97.24489795918367%
Accuracy of digit 1: 98.06167400881057%
Accuracy of digit 2: 90.31007751937985%
Accuracy of digit 3: 90.6930693069307%
Accuracy of digit 4: 93.38085539714868%
Accuracy of digit 5: 85.42600896860986%
Accuracy of digit 6: 93.84133611691023%
Accuracy of digit 7: 92.60700389105058%
Accuracy of digit 8: 85.62628336755647%
Accuracy of digit 9: 85.43111992071358%
Total execution time: 26.390687465667725 seconds
Training with lr=0.01, epoch=2
Epoch 1 complete: Avg. Loss: 0.5757933606406053
Epoch 2 complete: Avg. Loss: 0.26798166749812663
Test accuracy: 93.12%
Accuracy of digit 0: 97.85714285714286%
Accuracy of digit 1: 97.26872246696036%
Accuracy of digit 2: 93.9922480620155%
Accuracy of digit 3: 95.54455445544555%
Accuracy of digit 4: 93.58452138492872%
Accuracy of digit 5: 86.7713004484305%
Accuracy of digit 6: 95.82463465553236%
Accuracy of digit 7: 89.

In [None]:
dataiter = iter(train_loader)
images, labels = dataiter.next()
images = images.numpy()
# obtain one batch of training images

#--------------------------------?????????????????????????-------------------------------------