In [2]:
import torch
import torch.nn as nn
import torch.optim as opt
import torchvision
import torchvision.transforms as tr
from torch.utils.data import DataLoader, Subset
import numpy as np
import onnx
import torch.nn.functional as F
import csv

# Constants
INPUT_DIM = 784  # after flattening
OUTPUT_DIM = 10
DATASET_DIR = '../dataset'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Data loading and transformations
transform = tr.Compose([
    tr.ToTensor(),
    tr.Lambda(lambda x: torch.flatten(x))  # Flatten the image
])

train_dataset = torchvision.datasets.MNIST(DATASET_DIR, train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(DATASET_DIR, train=False, download=True, transform=transform)

# Creating subsets and data loaders
train_subset = Subset(train_dataset, indices=np.arange(4000))
train_loader = DataLoader(train_subset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


class SmallNetwork(nn.Module):
    def __init__(self, architecture):
        super(SmallNetwork, self).__init__()
        layers = []
        input_size = INPUT_DIM

        for i, layer_size in enumerate(architecture):
            layers.append(nn.Linear(input_size, layer_size))
            layers.append(nn.ReLU())
            input_size = layer_size

        layers.append(nn.Linear(input_size, OUTPUT_DIM))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)


def train_mnist_network(architecture, folder_path):
    print(f"Architecture: {architecture}")
    print(f"Device: {device}")

    # Building the network
    model = SmallNetwork(architecture).to(device)

    # Initialization of the training parameters
    learning_rate = 0.001

    epochs = 6000

    optimizer = opt.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    train_losses = []
    test_losses = []
    train_accuracies = []
    test_accuracies = []

    # Training loop
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_accuracy = 100 * correct / total
        train_loss = running_loss / len(train_loader)
        train_accuracies.append(train_accuracy)
        train_losses.append(train_loss)

        # Evaluate on test set
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        test_accuracy = 100 * correct / total
        test_loss = test_loss / len(test_loader)
        test_accuracies.append(test_accuracy)
        test_losses.append(test_loss)

        print(f"Epoch [{epoch + 1}/{epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, "
              f"Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%")

    # Save the model in ONNX format
    dummy_input = torch.randn(1, INPUT_DIM, device=device)
    onnx_path = f"{folder_path}/baseline_{architecture[0]}.onnx"
    
    torch.onnx.export(model, dummy_input, onnx_path)
    print(f"Model saved to {onnx_path}")

    return {
        "architecture": architecture,
        "train_accuracy": train_accuracies[-1],
        "test_accuracy": test_accuracies[-1],
        "train_loss": train_losses[-1],
        "test_loss": test_losses[-1],
    }


# Writing results to CSV file
csv_file = "accuracies_losses.csv"
with open(csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Architecture", "Train Accuracy", "Test Accuracy", "Train Loss", "Test Loss"])

    architectures = [ [200]]

    for architecture in architectures:
        folder_path = "output_folder"
        results = train_mnist_network(architecture, folder_path)
        print(results)

        # Write the architecture, accuracies, and losses to the CSV file
        writer.writerow([architecture, results['train_accuracy'], results['test_accuracy'],
                         results['train_loss'], results['test_loss']])

print(f"Results saved to {csv_file}")

Architecture: [200]
Device: cuda
Epoch [1/6000], Train Loss: 1.3573, Test Loss: 0.6834, Train Accuracy: 73.38%, Test Accuracy: 82.84%
Epoch [2/6000], Train Loss: 0.4981, Test Loss: 0.4341, Train Accuracy: 87.45%, Test Accuracy: 88.30%
Epoch [3/6000], Train Loss: 0.3498, Test Loss: 0.3789, Train Accuracy: 90.58%, Test Accuracy: 89.04%
Epoch [4/6000], Train Loss: 0.2919, Test Loss: 0.3407, Train Accuracy: 91.95%, Test Accuracy: 90.20%
Epoch [5/6000], Train Loss: 0.2566, Test Loss: 0.3186, Train Accuracy: 93.28%, Test Accuracy: 90.50%
Epoch [6/6000], Train Loss: 0.2228, Test Loss: 0.3146, Train Accuracy: 94.12%, Test Accuracy: 90.58%
Epoch [7/6000], Train Loss: 0.1981, Test Loss: 0.3063, Train Accuracy: 94.45%, Test Accuracy: 90.65%
Epoch [8/6000], Train Loss: 0.1835, Test Loss: 0.2943, Train Accuracy: 95.40%, Test Accuracy: 91.24%
Epoch [9/6000], Train Loss: 0.1661, Test Loss: 0.2872, Train Accuracy: 95.72%, Test Accuracy: 91.34%
Epoch [10/6000], Train Loss: 0.1471, Test Loss: 0.2777, Tr