In [None]:
# Imports
import pandas as pd
import numpy as np
import torch
import random
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [None]:
# Hyperparameters
batch_size = 16
n_epochs = 300
learning_rate = 0.05
seed = 42

In [None]:
# Custom train validation test set split for reproducing purposes
def train_val_test_split(X, y, training_size, val_every=10, test_every=10, test_offset=5, seed=seed):
    """
    Custom data split with absolute training size and fixed patterns for validation/test set.

    Args:
        X, y: torch tensors of equal length
        training_size (int): number of training samples to include
        val_every (int): every nth sample goes to validation set
        test_every (int): every nth sample goes to test set
        test_offset (int): offset to start test selection (e.g. every 10th starting at index 5)
        seed (int): random seed for reproducibility

    Returns:
        X_train, y_train, X_val, y_val, X_test, y_test (torch tensors)
    """

    n = len(X)
    indices = list(range(n))

    # Validation set: every nth sample starting at index 0
    val_indices = list(range(0, n, val_every))
    # Test set: every nth sample starting at offset
    test_indices = list(range(test_offset, n, test_every))

    # Remaining samples are potential training candidates
    remaining_indices = [i for i in indices if i not in val_indices + test_indices]

    # Shuffle for randomness
    random.seed(seed)
    random.shuffle(remaining_indices)

    # Cap training size to available data
    training_size = min(training_size, len(remaining_indices))
    train_indices = remaining_indices[:training_size]

    # Helper to slice tensors by indices
    def select(tensor, idxs):
        return tensor[idxs]

    X_train, y_train = select(X, train_indices), select(y, train_indices)
    X_val, y_val = select(X, val_indices), select(y, val_indices)
    X_test, y_test = select(X, test_indices), select(y, test_indices)

    return X_train, y_train, X_val, y_val, X_test, y_test


In [None]:
# Load and prepare data
df = pd.read_csv('../dataset/dummy_data.csv')
df.head()

In [None]:
# Input and target columns
X = df[["Fem_Fle(+)Ext(-)", "Fem_Var(+)Val(-)", "Fem_Int(+)Ext(-)"]].values
y = df[["f"]].values

In [None]:
# Convert to tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

# Split into train/val/test
X_train, y_train, X_val, y_val, X_test, y_test = train_val_test_split(X, y, training_size=80)

# Normalize y
y_mean = y_train.mean()
y_std = y_train.std()
y_train = (y_train - y_mean) / y_std
y_val = (y_val - y_mean) / y_std
y_test = (y_test - y_mean) / y_std

In [None]:
# Create tensor datasets & dataloaders
train_ds = TensorDataset(X_train, y_train)
val_ds = TensorDataset(X_val, y_val)
test_ds = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

In [None]:
# MLP model
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(3, 30),
            nn.ReLU(),
            nn.Linear(30, 20),
            nn.ReLU(),
            nn.Linear(20, 20),
            nn.ReLU(),
            nn.Linear(20, 1)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
# Initialize model, loss, optimizer
model = MLP()
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Training loop
for epoch in range(n_epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            y_pred = model(X_batch)
            val_loss += loss_fn(y_pred, y_batch).item()
    val_loss /= len(val_loader)

    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch {epoch+1:3d} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

print("Training complete.")

In [None]:
# Test evaluation
model.eval()
y_true_all = []
y_pred_all = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        y_pred = model(X_batch)
        y_true_all.extend(y_batch.squeeze().tolist())
        y_pred_all.extend(y_pred.squeeze().tolist())

# Denormalize
y_true_all = np.array(y_true_all)
y_pred_all = np.array(y_pred_all)
y_true_all = y_true_all * y_std.item() + y_mean.item()
y_pred_all = y_pred_all * y_std.item() + y_mean.item()

# Compute metrics
mse = np.mean((y_true_all - y_pred_all) ** 2)
mae = np.mean(np.abs(y_true_all - y_pred_all))

print(f" Test MSE: {mse:.6f}")
print(f" Test MAE: {mae:.6f}")