In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn, optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import itertools


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Load dataset
df = pd.read_csv('/content/drive/MyDrive/Dataset/student-mat.csv', sep=';')

# Preprocessing: Select numerical columns
categorical_columns = df.select_dtypes(include=['object']).columns
df = pd.get_dummies(df, columns=categorical_columns)  # One-hot encoding for categorical columns

# Feature-target split
X = df.drop(columns='age')
y = df['age']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)


In [4]:
class MLPRegressor(nn.Module):
    def __init__(self, input_dim, hidden_layers, activation_function):
        super(MLPRegressor, self).__init__()

        # Create layers dynamically based on hidden_layers
        layers = []
        for i, neurons in enumerate(hidden_layers):
            if i == 0:
                layers.append(nn.Linear(input_dim, neurons))
            else:
                layers.append(nn.Linear(hidden_layers[i - 1], neurons))

            # Add activation function
            if activation_function == 'relu':
                layers.append(nn.ReLU())
            elif activation_function == 'sigmoid':
                layers.append(nn.Sigmoid())
            elif activation_function == 'tanh':
                layers.append(nn.Tanh())
            elif activation_function == 'softmax':
                layers.append(nn.Softmax(dim=1))
            elif activation_function == 'linear':
                pass  # Linear activation, no additional layer needed

        # Output layer
        layers.append(nn.Linear(hidden_layers[-1], 1))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


In [5]:
def train_model(model, optimizer, criterion, train_loader, test_loader, epochs):
    for epoch in range(epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

        # Evaluate on test set every epoch
        model.eval()
        with torch.no_grad():
            y_pred = model(X_test)
            test_loss = criterion(y_pred, y_test)
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}, Test Loss: {test_loss.item()}")
    return model


In [6]:
# Hyperparameters
hidden_layers_options = [[4], [8, 4], [16, 8, 4]]  # Example: 1, 2, 3 layers with varying neurons
activation_functions = ['relu', 'sigmoid', 'tanh', 'softmax', 'linear']
learning_rates = [10, 1, 0.1, 0.01, 0.001, 0.0001]
batch_sizes = [16, 32, 64, 128, 256, 512]
epochs_options = [1, 10, 25, 50, 100, 250]

# DataLoader preparation
def create_data_loaders(X, y, batch_size):
    dataset = torch.utils.data.TensorDataset(X, y)
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Grid search over hyperparameters
results = []
input_dim = X_train.shape[1]

for hidden_layers, activation_function, lr, batch_size, epochs in itertools.product(
    hidden_layers_options, activation_functions, learning_rates, batch_sizes, epochs_options
):
    print(f"Training with {hidden_layers}, {activation_function}, lr={lr}, batch_size={batch_size}, epochs={epochs}")

    # Create data loaders
    train_loader = create_data_loaders(X_train, y_train, batch_size)
    test_loader = create_data_loaders(X_test, y_test, batch_size)

    # Model, loss function, optimizer
    model = MLPRegressor(input_dim, hidden_layers, activation_function)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Train model
    model = train_model(model, optimizer, criterion, train_loader, test_loader, epochs)

    # Evaluate final test performance
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test).numpy()
        mse = mean_squared_error(y_test.numpy(), y_pred)

    results.append((hidden_layers, activation_function, lr, batch_size, epochs, mse))
    print(f"Final MSE: {mse}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 180/250, Loss: 1.7129100561141968, Test Loss: 1.0140324831008911
Epoch 181/250, Loss: 1.2564829587936401, Test Loss: 0.9146613478660583
Epoch 182/250, Loss: 2.1281630992889404, Test Loss: 1.0767916440963745
Epoch 183/250, Loss: 3.0795631408691406, Test Loss: 0.9209849834442139
Epoch 184/250, Loss: 0.9669082164764404, Test Loss: 1.1145832538604736
Epoch 185/250, Loss: 1.436966061592102, Test Loss: 0.9538756012916565
Epoch 186/250, Loss: 1.1974660158157349, Test Loss: 1.1362926959991455
Epoch 187/250, Loss: 1.277437686920166, Test Loss: 1.0127644538879395
Epoch 188/250, Loss: 2.083723783493042, Test Loss: 0.9554153680801392
Epoch 189/250, Loss: 2.2175049781799316, Test Loss: 0.9436478614807129
Epoch 190/250, Loss: 1.4610862731933594, Test Loss: 0.9570813775062561
Epoch 191/250, Loss: 0.94575434923172, Test Loss: 1.06064772605896
Epoch 192/250, Loss: 1.7930493354797363, Test Loss: 0.9444460272789001
Epoch 193/250, Loss

In [7]:
# Save results to a DataFrame for analysis
results_df = pd.DataFrame(results, columns=['Hidden Layers', 'Activation Function', 'Learning Rate', 'Batch Size', 'Epochs', 'MSE'])
results_df.to_csv('mlp_regressor_results.csv', index=False)
print(results_df.sort_values(by='MSE'))


     Hidden Layers Activation Function  Learning Rate  Batch Size  Epochs  \
928            [4]              linear          1.000         256     100   
2044        [8, 4]              linear          0.100         256     100   
2099        [8, 4]              linear          0.001          32     250   
2002        [8, 4]              linear          1.000         128     100   
929            [4]              linear          1.000         256     250   
...            ...                 ...            ...         ...     ...   
3056    [16, 8, 4]              linear         10.000         512      25   
3049    [16, 8, 4]              linear         10.000         256      10   
3030    [16, 8, 4]              linear         10.000          32       1   
3024    [16, 8, 4]              linear         10.000          16       1   
3042    [16, 8, 4]              linear         10.000         128       1   

               MSE  
928   8.620731e-01  
2044  8.698234e-01  
2099  8.7794