In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
# Generate a dummy dataset
# Explanation: This dataset consists of binary classification data with two features (X) and two labels (y).
def create_dummy_dataset():
    np.random.seed(42)
    X = np.random.rand(1000, 2)  # 1000 samples, 2 features
    y = (X[:, 0] + X[:, 1] > 1).astype(int)  # Label is 1 if the sum of features > 1, otherwise 0
    return X, y

X, y = create_dummy_dataset()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Wrap data into a DataLoader
# Explanation: DataLoader allows us to split the dataset into batches for efficient training.
def create_dataloader(X, y, batch_size):
    dataset = TensorDataset(X, y)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Define the MLP model
# Explanation: This is a modular implementation of a Multi-Layer Perceptron (MLP) with configurable layers and activation functions.
class MLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, activation_fn):
        super(MLP, self).__init__()
        layers = []
        current_input_size = input_size
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(current_input_size, hidden_size))
            if activation_fn == 'linear':
                layers.append(nn.Identity())
            elif activation_fn == 'sigmoid':
                layers.append(nn.Sigmoid())
            elif activation_fn == 'relu':
                layers.append(nn.ReLU())
            elif activation_fn == 'softmax':
                layers.append(nn.Softmax(dim=1))
            elif activation_fn == 'tanh':
                layers.append(nn.Tanh())
            current_input_size = hidden_size
        layers.append(nn.Linear(current_input_size, output_size))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

# Experiment configurations
hidden_layer_configs = [[4], [8, 8], [16, 16, 16]]
activation_functions = ['linear', 'sigmoid', 'relu', 'softmax', 'tanh']
epochs_list = [1, 10, 25, 50, 100, 250]
learning_rates = [10, 1, 0.1, 0.01, 0.001, 0.0001]
batch_sizes = [16, 32, 64, 128, 256, 512]

# Training function
# Explanation: This function trains the model and evaluates its performance on the test set.
def train_and_evaluate(hidden_sizes, activation_fn, epochs, learning_rate, batch_size):
    train_loader = create_dataloader(X_train_tensor, y_train_tensor, batch_size)
    test_loader = create_dataloader(X_test_tensor, y_test_tensor, batch_size)

    model = MLP(input_size=2, hidden_sizes=hidden_sizes, output_size=2, activation_fn=activation_fn)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(epochs):
        model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    with torch.no_grad():
        y_pred = []
        y_true = []
        for batch_X, batch_y in test_loader:
            outputs = model(batch_X)
            _, preds = torch.max(outputs, 1)
            y_pred.extend(preds.numpy())
            y_true.extend(batch_y.numpy())
    return accuracy_score(y_true, y_pred)

# Run experiments and collect results
# Explanation: We iterate over all configurations and store the results for comparison.
results = []
for hidden_sizes in hidden_layer_configs:
    for activation_fn in activation_functions:
        for epochs in epochs_list:
            for learning_rate in learning_rates:
                for batch_size in batch_sizes:
                    acc = train_and_evaluate(hidden_sizes, activation_fn, epochs, learning_rate, batch_size)
                    results.append((hidden_sizes, activation_fn, epochs, learning_rate, batch_size, acc))

# Display results
# Explanation: Sort and display the configurations and their respective accuracies.
results.sort(key=lambda x: x[-1], reverse=True)
for res in results[:10]:  # Display top 10 configurations
    print(f"Hidden Sizes: {res[0]}, Activation: {res[1]}, Epochs: {res[2]}, Learning Rate: {res[3]}, Batch Size: {res[4]}, Accuracy: {res[5]:.4f}")


Hidden Sizes: [4], Activation: linear, Epochs: 10, Learning Rate: 0.01, Batch Size: 16, Accuracy: 1.0000
Hidden Sizes: [4], Activation: linear, Epochs: 25, Learning Rate: 0.1, Batch Size: 256, Accuracy: 1.0000
Hidden Sizes: [4], Activation: linear, Epochs: 50, Learning Rate: 10, Batch Size: 512, Accuracy: 1.0000
Hidden Sizes: [4], Activation: linear, Epochs: 50, Learning Rate: 0.01, Batch Size: 32, Accuracy: 1.0000
Hidden Sizes: [4], Activation: linear, Epochs: 50, Learning Rate: 0.01, Batch Size: 128, Accuracy: 1.0000
Hidden Sizes: [4], Activation: linear, Epochs: 100, Learning Rate: 10, Batch Size: 512, Accuracy: 1.0000
Hidden Sizes: [4], Activation: linear, Epochs: 100, Learning Rate: 1, Batch Size: 512, Accuracy: 1.0000
Hidden Sizes: [4], Activation: linear, Epochs: 100, Learning Rate: 0.1, Batch Size: 32, Accuracy: 1.0000
Hidden Sizes: [4], Activation: linear, Epochs: 100, Learning Rate: 0.01, Batch Size: 128, Accuracy: 1.0000
Hidden Sizes: [4], Activation: linear, Epochs: 100, Le