In [1]:
import torch
import matplotlib.pyplot as plt
from torch import nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import time
import itertools
import json

In [2]:
print(torch.__version__)

# Setup computational device based on CUDA availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

2.2.1


In [3]:
# Acquire and organize MNIST training dataset
train_dataset = datasets.MNIST(root='data', train=True, transform=transforms.ToTensor(),
                               download=True)
train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True, num_workers=1)

# Acquire and organize MNIST testing dataset
test_dataset = datasets.MNIST(root='data', train=False, transform=transforms.ToTensor())
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=1)

# Group data loaders into a dictionary for ease of access
data_loaders = {'train': train_loader, 'test': test_loader}
# [1] https://www.kaggle.com/code/fgiorgio/multi-layer-perceptron-mnist

In [4]:
# Define hyperparameters
seq_length = 28
input_dimensions = 28
rnn_layers = 2
output_classes = 10
batch_sz = 100
hidden_unit = 128

learning_rates = [0.01, 0.001, 0.0005]
num_epochs = [10, 20, 50]

param_combinations = list(itertools.product(learning_rates, num_epochs))

In [5]:
# Construct RNN architecture
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_units, rnn_layers, output_classes):
        super(RNNModel, self).__init__()
        self.hidden_units = hidden_units
        self.rnn_layers = rnn_layers
        self.rnn = nn.LSTM(input_dim, hidden_units, rnn_layers, batch_first=True)
        self.fc = nn.Linear(hidden_units, output_classes)
    
    def forward(self, x):
        # Initialize hidden and cell states for LSTM layers
        h0 = torch.zeros(self.rnn_layers, x.size(0), self.hidden_units).to(device)
        c0 = torch.zeros(self.rnn_layers, x.size(0), self.hidden_units).to(device)
        
        # Feed data through recurrent layers and obtain last output
        out, _ = self.rnn(x, (h0, c0))  # Tuple of (hidden state, cell state)
        
        # Adapt the output for the final classification layer
        out = self.fc(out[:, -1, :])  # Get the last time step output for each batch
        return out


In [None]:
results = {}

for lr, epoch_n in param_combinations:

    # Start program timer
    start_time = time.time()

    current_hyperparameters = str(lr) +"," +str(epoch_n)

    results[current_hyperparameters] = {
        'train_losses': [],
        'val_losses': [],
        'accuracies': [],
        'execution_time': []
    }

    print(f"Training with lr={lr}, epoch={epoch_n}")

    # Instantiate and prepare model for training
    model = RNNModel(input_dimensions, hidden_unit, rnn_layers, output_classes).to(device)
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    
    # Method for training iterations
    def train_model(epoch_count, neural_net, loaders):
        for epoch in range(epoch_count):
            for batch_idx, (data, target) in enumerate(loaders['train']):
                # Prep batch data for processing
                data = data.view(-1, seq_length, input_dimensions).to(device)
                target = target.to(device)
    
                # Execute a forward pass through the network
                predictions = neural_net(data)
                loss = loss_function(predictions, target)
    
                # Compute gradients and adjust model weights
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
    
                results[current_hyperparameters]['train_losses'].append(loss.item())
    
                # Output training process metrics
                if (batch_idx + 1) % 100 == 0:
                    print(f'Epoch [{epoch + 1}/{epoch_count}], Step [{batch_idx + 1}/ {len(loaders["train"])}],
                    Loss: {loss.item():.4f}')
    
    # Initiate model training phase
    train_model(epoch_n, model, data_loaders)
    
    
    # Method for evaluating network performance
    def test_model(neural_net, loaders):
        
        neural_net.eval()  # Transition model to evaluation mode
        total_samples = 0
        correct_predictions = 0
        with torch.no_grad():
            for data, target in loaders['test']:
                data = data.view(-1, seq_length, input_dimensions).to(device)
                target = target.to(device)
                predictions = neural_net(data)
    
                val_loss = loss_function(predictions, target).item()
                results[current_hyperparameters]['val_losses'].append(val_loss)
    
                _, predicted_classes = torch.max(predictions, 1)
                correct_predictions += (predicted_classes == target).sum().item()
                total_samples += target.size(0)
                
        # Calculate overall accuracy after processing all batches
        overall_accuracy = 100 * correct_predictions / total_samples
        results[current_hyperparameters]['accuracies'].append(overall_accuracy)
            
        print(f'Test Accuracy of the model on the {total_samples} test images:{100 * correct_predictions / 
        total_samples:.2f}%')

        # End program timer
        end_time = time.time()
        total_time = end_time - start_time
        results[current_hyperparameters]['execution_time'].append(total_time)
        print(f"Total execution time: {total_time} seconds")
        
    # Execute model evaluation
    test_model(model, data_loaders)

# Save dictionary to a JSON file
with open('RNNresults.json', 'w') as json_file:
    json.dump(results, json_file, indent=4)  # `indent` makes the file human-readable

Training with lr=0.01, epoch=10
Epoch [1/10], Step [100/ 600], Loss: 0.5282
Epoch [1/10], Step [200/ 600], Loss: 0.3638
Epoch [1/10], Step [300/ 600], Loss: 0.2812
Epoch [1/10], Step [400/ 600], Loss: 0.3004
Epoch [1/10], Step [500/ 600], Loss: 0.1060
Epoch [1/10], Step [600/ 600], Loss: 0.1280
Epoch [2/10], Step [100/ 600], Loss: 0.0552
Epoch [2/10], Step [200/ 600], Loss: 0.1195
Epoch [2/10], Step [300/ 600], Loss: 0.0303
Epoch [2/10], Step [400/ 600], Loss: 0.0955
Epoch [2/10], Step [500/ 600], Loss: 0.0573
Epoch [2/10], Step [600/ 600], Loss: 0.1726
Epoch [3/10], Step [100/ 600], Loss: 0.0780
Epoch [3/10], Step [200/ 600], Loss: 0.0959
Epoch [3/10], Step [300/ 600], Loss: 0.0778
Epoch [3/10], Step [400/ 600], Loss: 0.1002
Epoch [3/10], Step [500/ 600], Loss: 0.0137
Epoch [3/10], Step [600/ 600], Loss: 0.1489
Epoch [4/10], Step [100/ 600], Loss: 0.0432
Epoch [4/10], Step [200/ 600], Loss: 0.0677
Epoch [4/10], Step [300/ 600], Loss: 0.0964
Epoch [4/10], Step [400/ 600], Loss: 0.0169


In [None]:
"""# After all epochs are done (after the training and evaluation loop)
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Training and Validation Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()"""

