In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader, random_split
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score

def load_svhn(batch_size=32):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4377, 0.4438, 0.4728), (0.198, 0.201, 0.197))
    ])

    train_dataset = datasets.SVHN(root='./data', split='train', transform=transform, download=True)
    test_dataset = datasets.SVHN(root='./data', split='test', transform=transform, download=True)

    val_size = int(0.1 * len(train_dataset))
    train_size = len(train_dataset) - val_size
    train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader

class FeedforwardNN(nn.Module):
    def __init__(self, input_size=3072, hidden_layers=[32, 64, 128], output_size=10, activation='relu', weight_init='xavier'):
        super(FeedforwardNN, self).__init__()
        layers = []
        prev_size = input_size
        for h in hidden_layers:
            layer = nn.Linear(prev_size, h)
            if weight_init == 'xavier':
                nn.init.xavier_uniform_(layer.weight)
            elif weight_init == 'random':
                nn.init.uniform_(layer.weight)
            layers.append(layer)
            layers.append(nn.ReLU() if activation == 'relu' else nn.Sigmoid())
            prev_size = h

        layers.append(nn.Linear(prev_size, output_size))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten images
        return self.model(x)

def train_model(model, train_loader, val_loader, optimizer, criterion, epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        val_acc = evaluate_model(model, val_loader)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Val Accuracy: {val_acc:.4f}')

def evaluate_model(model, data_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return accuracy_score(all_labels, all_preds)

train_loader, val_loader, test_loader = load_svhn(batch_size=32)

# Hyperparameter configuration
hidden_layers_options = [[32, 64, 128], [32, 64, 128, 128], [32, 64, 128, 128, 128]]
activation_options = ['relu', 'sigmoid']
weight_init_options = ['xavier', 'random']
learning_rate_options = [1e-3, 1e-4]
optimizer_options = ['sgd', 'momentum', 'nesterov', 'rmsprop', 'adam', 'nadam']
batch_size_options = [16, 32, 64]
weight_decay_options = [0, 0.0005, 0.5]
epochs_options = [5, 10]

best_acc = 0
best_model = None
best_optimizer = None

for hidden_layers in hidden_layers_options:
    for activation in activation_options:
        for weight_init in weight_init_options:
            for learning_rate in learning_rate_options:
                for optimizer_choice in optimizer_options:
                    for batch_size in batch_size_options:
                        for weight_decay in weight_decay_options:
                            for epochs in epochs_options:
                                print(f'Training with {hidden_layers}, {activation}, {weight_init}, lr={learning_rate}, optimizer={optimizer_choice}, batch_size={batch_size}, weight_decay={weight_decay}, epochs={epochs}')
                                train_loader, val_loader, test_loader = load_svhn(batch_size=batch_size)
                                model = FeedforwardNN(input_size=32*32*3, hidden_layers=hidden_layers, activation=activation, weight_init=weight_init)
                                criterion = nn.CrossEntropyLoss()

                                optimizer_dict = {
                                    'sgd': optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay),
                                    'momentum': optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay),
                                    'nesterov': optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=weight_decay),
                                    'rmsprop': optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay),
                                    'adam': optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay),
                                    'nadam': optim.NAdam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
                                }

                                optimizer = optimizer_dict.get(optimizer_choice, optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay))

                                train_model(model, train_loader, val_loader, optimizer, criterion, epochs=epochs)
                                val_acc = evaluate_model(model, val_loader)

                                if val_acc > best_acc:
                                    best_acc = val_acc
                                    best_model = model
                                    best_optimizer = optimizer_choice

print(f'Best Validation Accuracy: {best_acc:.4f} using optimizer {best_optimizer}')
test_acc = evaluate_model(best_model, test_loader)
print(f'Test Accuracy: {test_acc:.4f}')




Using downloaded and verified file: ./data/train_32x32.mat
Using downloaded and verified file: ./data/test_32x32.mat
Training with [32, 64, 128], relu, xavier, lr=0.001, optimizer=sgd, batch_size=16, weight_decay=0, epochs=5
Using downloaded and verified file: ./data/train_32x32.mat
Using downloaded and verified file: ./data/test_32x32.mat
Epoch 1/5, Loss: 2.2322, Val Accuracy: 0.1969
Epoch 2/5, Loss: 2.0497, Val Accuracy: 0.3563
Epoch 3/5, Loss: 1.6385, Val Accuracy: 0.5267
Epoch 4/5, Loss: 1.3360, Val Accuracy: 0.6111
Epoch 5/5, Loss: 1.1716, Val Accuracy: 0.6528
Training with [32, 64, 128], relu, xavier, lr=0.001, optimizer=sgd, batch_size=16, weight_decay=0, epochs=10
Using downloaded and verified file: ./data/train_32x32.mat
Using downloaded and verified file: ./data/test_32x32.mat
Epoch 1/10, Loss: 2.2060, Val Accuracy: 0.2311
Epoch 2/10, Loss: 1.9403, Val Accuracy: 0.4431
Epoch 3/10, Loss: 1.5011, Val Accuracy: 0.5891
Epoch 4/10, Loss: 1.2581, Val Accuracy: 0.6336
Epoch 5/10, Lo