In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import itertools

# Load Iris dataset
data = load_iris()
X = data.data
# Use only one feature for regression (e.g., target 0 vs continuous target)
y = X[:, 0]  # Example: Predicting the first feature

# Preprocessing
scaler = StandardScaler()
X = scaler.fit_transform(X)
y = torch.tensor(y, dtype=torch.float32)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y.numpy(), test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)  # Add extra dimension for regression
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Define MLP model
class MLP(nn.Module):
    def __init__(self, input_size, hidden_layers, activation_fn):
        super(MLP, self).__init__()
        layers = []
        in_features = input_size

        for hidden_neurons in hidden_layers:
            layers.append(nn.Linear(in_features, hidden_neurons))
            layers.append(activation_fn())
            in_features = hidden_neurons

        layers.append(nn.Linear(in_features, 1))  # Output layer for regression
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

# Hyperparameter combinations
hidden_layer_configs = [[4], [8], [16], [4, 4], [8, 8], [16, 16], [4, 4, 4], [8, 8, 8], [16, 16, 16]]
activation_functions = {'linear': nn.Identity, 'sigmoid': nn.Sigmoid, 'relu': nn.ReLU}
epochs_options = [1, 10, 25]
learning_rates = [10, 1, 0.1, 0.01]
batch_sizes = [16, 32, 64]

# Train and evaluate model
results = []
for hidden_layers, (activation_name, activation_fn), epochs, lr, batch_size in itertools.product(
    hidden_layer_configs, activation_functions.items(), epochs_options, learning_rates, batch_sizes
):
    # Model initialization
    model = MLP(input_size=X_train.shape[1], hidden_layers=hidden_layers, activation_fn=activation_fn).cuda()
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=lr)

    # DataLoader for batching
    train_data = torch.utils.data.TensorDataset(X_train, y_train)
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

    # Training loop
    for epoch in range(epochs):
        model.train()
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.cuda(), batch_y.cuda()
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    with torch.no_grad():
        X_test_cuda = X_test.cuda()
        y_test_cuda = y_test.cuda()
        predictions = model(X_test_cuda).cpu().numpy()

        # Debugging NaN values
        if np.isnan(predictions).any():
            print("Predictions contain NaN values.")
            predictions = np.nan_to_num(predictions)  # Replace NaN with 0

        mse = mean_squared_error(y_test.numpy(), predictions)

    # Record results
    results.append({
        'hidden_layers': hidden_layers,
        'activation': activation_name,
        'epochs': epochs,
        'learning_rate': lr,
        'batch_size': batch_size,
        'mse': mse
    })

# Display results
sorted_results = sorted(results, key=lambda x: x['mse'])
for result in sorted_results[:100]:
    print(result)


ValueError: Input contains NaN.