 - Dataset
   - MNIST
   - CIFAR 10
   - CIFAR 100
 - choose architecture - MLP to classify the images of the datasets.
 - design 1 MLP for each dataset.
 - explore RELU, leakyRELU, sigmoid.
 - explore SGD, ADAM
 - explore cross entropy, mean squared loss
 - Include dropout, exclude dropout



 train each dataset it with relu with sgd, leaky relu with sgd then sigmoid with sgd.
 do the same relu with ADAM, leaky relu with ADAM then sigmoid with ADAM.
 use cross entropy for each case.
 for every model with relu and adam, use dropout.
 for other dont use dropout.

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import pandas as pd
from tqdm import tqdm

In [3]:
device = torch.device("mps")

class MLP(nn.Module):
    def __init__(self, input_size, hidden_layer, output_size, activation_fn='relu', dropout_rate=0.0):
        super(MLP, self).__init__()
        self.layers = []
        prev = input_size
        for hidden_size in hidden_layer:
          self.layers.append(nn.Linear(prev, hidden_size))
          self.layers.append(nn.BatchNorm1d(hidden_size))
          self.layers.append(nn.ReLU())
          self.layers.append(nn.Dropout(dropout_rate))
          prev = hidden_size
        self.layers.append(nn.Linear(prev, output_size))
        self.model = nn.Sequential(*self.layers)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten input
        return self.model(x)

    def _apply_activation(self, x):
        if self.activation_fn == 'relu':
            return F.relu(x)
        elif self.activation_fn == 'leaky_relu':
            return F.leaky_relu(x, negative_slope=0.01)
        elif self.activation_fn == 'sigmoid':
            return torch.sigmoid(x)
        else:
            raise ValueError(f"Unsupported activation function: {self.activation_fn}")

In [4]:


def train_test_model(model, train_loader, test_loader, optimizer, criterion, epochs=10):
    model.to(device)
    train_losses, test_losses, test_accuracies = [], [], []

    for epoch in range(epochs):
        # Training
        model.train()
        running_loss = 0.0
        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} - Training"):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        train_losses.append(running_loss / len(train_loader))

        # Testing
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in tqdm(test_loader, desc=f"Epoch {epoch+1}/{epochs} - Testing"):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        test_losses.append(test_loss / len(test_loader))
        test_accuracies.append(100 * correct / total)

    return train_losses, test_losses, test_accuracies


In [5]:

def load_dataset(dataset_name):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,)) if dataset_name == 'MNIST' else
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    if dataset_name == 'MNIST':
        train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
        test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    elif dataset_name == 'CIFAR10':
        train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
        test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
    elif dataset_name == 'CIFAR100':
        train_dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
        test_dataset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
    else:
        raise ValueError(f"Unsupported dataset: {dataset_name}")

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
    return train_loader, test_loader


In [6]:

def run_experiments(dataset_name, input_size, output_size):
    results = []
    train_loader, test_loader = load_dataset(dataset_name)
    if dataset_name == 'MNIST':
      hidden_layer=[512, 256]
    elif dataset_name == 'CIFAR10':
      hidden_layer=[1024, 512, 256]
    else:
      hidden_layer=[1024, 512, 256, 128]
    # Experiment 1: ReLU + SGD
    model = MLP(input_size, hidden_layer, output_size, activation_fn='relu')
    optimizer = optim.SGD(model.parameters(), lr=0.005)
    criterion = nn.CrossEntropyLoss()
    train_losses, test_losses, test_accuracies = train_test_model(model, train_loader, test_loader, optimizer, criterion)
    results.append({
        'Dataset': dataset_name,
        'Activation': 'ReLU',
        'Optimizer': 'SGD',
        'Dropout': 'No',
        'Test Accuracy': max(test_accuracies)
    })

    # Experiment 2: Leaky ReLU + SGD
    model = MLP(input_size, hidden_layer, output_size, activation_fn='leaky_relu')
    optimizer = optim.SGD(model.parameters(), lr=0.005)
    criterion = nn.CrossEntropyLoss()
    train_losses, test_losses, test_accuracies = train_test_model(model, train_loader, test_loader, optimizer, criterion)
    results.append({
        'Dataset': dataset_name,
        'Activation': 'Leaky ReLU',
        'Optimizer': 'SGD',
        'Dropout': 'No',
        'Test Accuracy': max(test_accuracies)
    })

    # Experiment 3: Sigmoid + SGD
    model = MLP(input_size, hidden_layer, output_size, activation_fn='sigmoid')
    optimizer = optim.SGD(model.parameters(), lr=0.005)
    criterion = nn.CrossEntropyLoss()
    train_losses, test_losses, test_accuracies = train_test_model(model, train_loader, test_loader, optimizer, criterion)
    results.append({
        'Dataset': dataset_name,
        'Activation': 'Sigmoid',
        'Optimizer': 'SGD',
        'Dropout': 'No',
        'Test Accuracy': max(test_accuracies)
    })

    # Experiment 4: ReLU + ADAM
    model = MLP(input_size, hidden_layer, output_size, activation_fn='relu')
    optimizer = optim.Adam(model.parameters(), lr=0.005)
    criterion = nn.CrossEntropyLoss()
    train_losses, test_losses, test_accuracies = train_test_model(model, train_loader, test_loader, optimizer, criterion)
    results.append({
        'Dataset': dataset_name,
        'Activation': 'ReLU',
        'Optimizer': 'ADAM',
        'Dropout': 'No',
        'Test Accuracy': max(test_accuracies)
    })

    # Experiment 5: ReLU + ADAM + Dropout
    model = MLP(input_size, hidden_layer, output_size, activation_fn='relu', dropout_rate=0.3)
    optimizer = optim.Adam(model.parameters(), lr=0.005)
    criterion = nn.CrossEntropyLoss()
    train_losses, test_losses, test_accuracies = train_test_model(model, train_loader, test_loader, optimizer, criterion)
    results.append({
        'Dataset': dataset_name,
        'Activation': 'ReLU',
        'Optimizer': 'ADAM',
        'Dropout': 'Yes',
        'Test Accuracy': max(test_accuracies)
    })

    return results


In [7]:

dataset_s = [
    ('MNIST', 28*28, 10),
    ('CIFAR10', 32*32*3, 10),
    ('CIFAR100', 32*32*3, 100)
]

all_results = []
for dataset_name, input_size, output_size in dataset_s:
    print(f"Running experiments for {dataset_name}...")
    results = run_experiments(dataset_name, input_size, output_size)
    all_results.extend(results)

df = pd.DataFrame(all_results)
df.to_excel('experiment_results.xlsx', index=False)
print("Results saved to experiment_results.xlsx")



Running experiments for MNIST...


100.0%
100.0%
100.0%
100.0%
Epoch 1/10 - Training: 100%|██████████| 938/938 [00:08<00:00, 116.66it/s]
Epoch 1/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 125.20it/s]
Epoch 2/10 - Training: 100%|██████████| 938/938 [00:05<00:00, 171.49it/s]
Epoch 2/10 - Testing: 100%|██████████| 157/157 [00:00<00:00, 200.37it/s]
Epoch 3/10 - Training: 100%|██████████| 938/938 [00:05<00:00, 169.02it/s]
Epoch 3/10 - Testing: 100%|██████████| 157/157 [00:00<00:00, 196.99it/s]
Epoch 4/10 - Training: 100%|██████████| 938/938 [00:05<00:00, 169.89it/s]
Epoch 4/10 - Testing: 100%|██████████| 157/157 [00:00<00:00, 194.97it/s]
Epoch 5/10 - Training: 100%|██████████| 938/938 [00:05<00:00, 170.30it/s]
Epoch 5/10 - Testing: 100%|██████████| 157/157 [00:00<00:00, 198.73it/s]
Epoch 6/10 - Training: 100%|██████████| 938/938 [00:05<00:00, 170.45it/s]
Epoch 6/10 - Testing: 100%|██████████| 157/157 [00:00<00:00, 199.02it/s]
Epoch 7/10 - Training: 100%|██████████| 938/938 [00:05<00:00, 169.27it/s]
Epoch 7/10 - Tes

Running experiments for CIFAR10...


100.0%
Epoch 1/10 - Training: 100%|██████████| 782/782 [00:08<00:00, 91.04it/s] 
Epoch 1/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 135.37it/s]
Epoch 2/10 - Training: 100%|██████████| 782/782 [00:07<00:00, 101.74it/s]
Epoch 2/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 138.97it/s]
Epoch 3/10 - Training: 100%|██████████| 782/782 [00:07<00:00, 101.54it/s]
Epoch 3/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 137.36it/s]
Epoch 4/10 - Training: 100%|██████████| 782/782 [00:07<00:00, 101.99it/s]
Epoch 4/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 136.72it/s]
Epoch 5/10 - Training: 100%|██████████| 782/782 [00:07<00:00, 100.92it/s]
Epoch 5/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 136.30it/s]
Epoch 6/10 - Training: 100%|██████████| 782/782 [00:07<00:00, 101.24it/s]
Epoch 6/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 131.82it/s]
Epoch 7/10 - Training: 100%|██████████| 782/782 [00:08<00:00, 92.69it/s] 
Epoch 7/10 - Testing: 100%|██████████

Running experiments for CIFAR100...


100.0%
Epoch 1/10 - Training: 100%|██████████| 782/782 [00:08<00:00, 90.70it/s]
Epoch 1/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 129.95it/s]
Epoch 2/10 - Training: 100%|██████████| 782/782 [00:07<00:00, 99.23it/s]
Epoch 2/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 135.99it/s]
Epoch 3/10 - Training: 100%|██████████| 782/782 [00:08<00:00, 94.82it/s]
Epoch 3/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 117.65it/s]
Epoch 4/10 - Training: 100%|██████████| 782/782 [00:08<00:00, 96.20it/s] 
Epoch 4/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 135.22it/s]
Epoch 5/10 - Training: 100%|██████████| 782/782 [00:07<00:00, 98.64it/s]
Epoch 5/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 128.94it/s]
Epoch 6/10 - Training: 100%|██████████| 782/782 [00:08<00:00, 94.90it/s]
Epoch 6/10 - Testing: 100%|██████████| 157/157 [00:01<00:00, 132.51it/s]
Epoch 7/10 - Training: 100%|██████████| 782/782 [00:08<00:00, 91.87it/s]
Epoch 7/10 - Testing: 100%|██████████| 157/

ModuleNotFoundError: No module named 'openpyxl'

In [8]:
df = pd.DataFrame(all_results)
df.to_excel('experiment_results.xlsx', index=False)
print("Results saved to experiment_results.xlsx")

Results saved to experiment_results.xlsx
