In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from itertools import product

from utils import import_data,combine_data,split_data,standardize,compute_avg_delta_dataset #standardize

In [None]:
# --- Define MLP Model ---
ACTIVATIONS = {
    "RL": nn.ReLU,
    "TN": nn.Tanh,
    "SI": nn.Sigmoid,
    "LI": nn.Identity
}

class MLP(nn.Module):
    def __init__(self, input_dim, output_dim, architecture):
        super().__init__()
        layers = []
        prev = input_dim

        for elem in architecture:
            if isinstance(elem, int):  # layer size
                layers.append(nn.Linear(prev, elem))
                prev = elem
            elif isinstance(elem, str):  # activation
                act_class = ACTIVATIONS.get(elem.upper())
                if act_class is None:
                    raise ValueError(f"Unknown activation code: {elem}")
                layers.append(act_class())
            else:
                raise ValueError(f"Invalid architecture element: {elem}")

        layers.append(nn.Linear(prev, output_dim))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [None]:
# --- Training Function ---
def train_mlp(X_train, y_train, X_val, y_val, input_dim, output_dim, 
              hidden_layers, lr, batch_size, epochs):
    
    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)
    X_val   = torch.tensor(X_val, dtype=torch.float32)
    y_val   = torch.tensor(y_val, dtype=torch.float32)
    
    model = MLP(input_dim, output_dim, hidden_layers)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_loader:
            optimizer.zero_grad()
            pred = model(xb)
            loss = criterion(pred, yb)
            loss.backward()
            optimizer.step()
    
    # Validation loss
    model.eval()
    with torch.no_grad():
        val_pred = model(X_val)
        val_loss = criterion(val_pred, y_val).item()
    
    return model, val_loss

In [None]:
from sklearn.model_selection import KFold
import numpy as np

def run_kfold_cv(X, y, input_dim, output_dim,
                 hidden_layers, lr, batch_size, epochs,
                 k=5, random_state=42):
    """
    Perform K-fold cross-validation using train_mlp().
    Uses your standardize() to scale each fold's data safely.
    Returns mean and std of validation losses.
    """
    kf = KFold(n_splits=k, shuffle=True, random_state=random_state)
    fold_losses = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        # --- scale within this fold (fit on train only) ---
        X_train, X_val, _, y_train, y_val, _, _, _ = standardize(
            X_train, X_val, X_val,  # dummy X_test
            y_train, y_val, y_val   # dummy y_test
        )

        # --- train ---
        _, val_loss = train_mlp(
            X_train, y_train, X_val, y_val,
            input_dim, output_dim,
            hidden_layers, lr, batch_size, epochs
        )

        fold_losses.append(val_loss)
        print(f"Fold {fold+1}/{k}: Val Loss = {val_loss:.6f}")

    mean_loss = np.mean(fold_losses)
    std_loss  = np.std(fold_losses)
    print(f"\n📊 Mean CV Loss: {mean_loss:.6f} ± {std_loss:.6f}")
    return mean_loss, std_loss


In [None]:
# --- Grid Search Function ---
def grid_search_mlp(X_train, y_train, X_val, y_val, input_dim, output_dim, param_grid):
    results = []
    best_model = None
    best_loss = float('inf')
    best_params = None

    keys, values = zip(*param_grid.items())
    
    for combination in product(*values):
        params = dict(zip(keys, combination))
        print(f"Testing: {params}")

        model, val_loss= train_mlp(
            X_train, y_train, X_val, y_val,
            input_dim=input_dim,
            output_dim=output_dim,
            **params
        )

        results.append((params, val_loss))

        # Keep best model
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = model
            best_params = params

    results.sort(key=lambda x: x[1])
    return results, best_model, best_params

In [None]:
from itertools import product
import numpy as np

def grid_search_mlp_cv(X, y, input_dim, output_dim, param_grid,
                       default_k_folds=5, random_state=42, idx=None):
    """
    Perform grid search with K-fold cross-validation using run_kfold_cv().
    
    Returns:
        best_params: dict of best hyperparameters (with folds included if present)
        best_loss: lowest mean CV loss found
        results: list of dicts with {'params', 'mean_loss', 'std_loss'}
        best_model: model retrained on all data using best_params
        x_scaler, y_scaler: scalers fitted on full data
    """

    results = []
    best_loss = float('inf')
    best_params = None
    best_model = None

    keys, values = zip(*param_grid.items())

    # --- Optional feature filtering ---
    if idx is not None:
        X = X[:, idx]
        print(f"🧩 Using {X.shape[1]} selected features (indices from feature selection)")
        input_dim = X.shape[1]  # update input dimension automatically

    # --- Grid Search Loop ---
    for combination in product(*values):
        params = dict(zip(keys, combination))
        k_folds = params.pop("k_folds", default_k_folds)  # use fold count if present

        print(f"\n🔍 Testing params: {params} | k_folds={k_folds}")

        # --- Run CV for this combination ---
        mean_loss, std_loss = run_kfold_cv(
            X, y,
            input_dim=input_dim,
            output_dim=output_dim,
            hidden_layers=params["hidden_layers"],
            lr=params["lr"],
            batch_size=params["batch_size"],
            epochs=params["epochs"],
            k=k_folds,
            random_state=random_state
        )

        results.append({
            "params": dict(params, k_folds=k_folds),
            "mean_loss": mean_loss,
            "std_loss": std_loss
        })

        if mean_loss < best_loss:
            best_loss = mean_loss
            best_params = dict(params, k_folds=k_folds)

    # --- Report best combination ---
    print(f"\n🏆 Best parameters found: {best_params}")
    print(f"   Mean CV Loss: {best_loss:.6f}")

    # --- Retrain final model using best params on full dataset ---
    X_train, _, _, y_train, _, _, x_scaler, y_scaler = standardize(X, X, X, y, y, y)
    best_model, _ = train_mlp(
        X_train, y_train, X_train, y_train,
        input_dim=input_dim, output_dim=output_dim,
        **{k: v for k, v in best_params.items() if k != "k_folds"}
    )

    return best_params, best_loss, results, best_model, x_scaler, y_scaler


In [None]:
def train_autoencoder(X_train, X_val, input_dim, hidden_layers,
                      lr=1e-3, batch_size=32, epochs=100, verbose=False):
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_val   = torch.tensor(X_val, dtype=torch.float32)

    model = MLP(input_dim, input_dim, hidden_layers)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    train_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(X_train, X_train),
        batch_size=batch_size, shuffle=True
    )

    for epoch in range(epochs):
        model.train()
        for xb, _ in train_loader:
            optimizer.zero_grad()
            recon = model(xb)
            loss = criterion(recon, xb)
            loss.backward()
            optimizer.step()

        if verbose and (epoch+1) % 10 == 0:
            model.eval()
            with torch.no_grad():
                val_loss = criterion(model(X_val), X_val).item()
            print(f"[AE] Epoch {epoch+1:03d} | Val Loss: {val_loss:.6f}")

    model.eval()
    with torch.no_grad():
        val_loss = criterion(model(X_val), X_val).item()
    return model, val_loss


In [None]:
def extract_encoder(autoencoder_model):
    layers = []
    for layer in autoencoder_model.model:
        layers.append(layer)
        if isinstance(layer, nn.Tanh):  # stop at bottleneck activation
            break
    return nn.Sequential(*layers)

def get_latent_dim(encoder):
    # find last Linear layer and return its out_features
    for layer in reversed(encoder):
        if isinstance(layer, nn.Linear):
            return layer.out_features
    raise ValueError("Could not find a Linear layer in encoder.")

def train_encoder_regressor(encoder, X_train, y_train, X_val, y_val,
                            reg_layers=[32, "RL"], lr=1e-3, batch_size=32,
                            epochs=100, fine_tune=False, verbose=False):
    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)
    X_val   = torch.tensor(X_val, dtype=torch.float32)
    y_val   = torch.tensor(y_val, dtype=torch.float32)

    latent_dim = get_latent_dim(encoder)  # ✅ fixed line

    class EncoderRegressor(nn.Module):
        def __init__(self, encoder, reg_layers, output_dim):
            super().__init__()
            self.encoder = encoder
            self.regressor = MLP(latent_dim, output_dim, reg_layers)
        def forward(self, x):
            z = self.encoder(x)
            return self.regressor(z)

    model = EncoderRegressor(encoder, reg_layers, y_train.shape[1])

    # Freeze encoder unless fine_tune=True
    for p in model.encoder.parameters():
        p.requires_grad = fine_tune

    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
    criterion = nn.MSELoss()

    train_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(X_train, y_train),
        batch_size=batch_size, shuffle=True
    )

    for epoch in range(epochs):
        model.train()
        for xb, yb in train_loader:
            optimizer.zero_grad()
            pred = model(xb)
            loss = criterion(pred, yb)
            loss.backward()
            optimizer.step()

        if verbose and (epoch+1) % 10 == 0:
            model.eval()
            with torch.no_grad():
                val_loss = criterion(model(X_val), y_val).item()
            print(f"[REG] Epoch {epoch+1:03d} | Val Loss: {val_loss:.6f}")

    model.eval()
    with torch.no_grad():
        val_loss = criterion(model(X_val), y_val).item()
    return model, val_loss


In [None]:
def grid_search_autoencoder(X_train, y_train, X_val, y_val, input_dim, output_dim, param_grid):
    results = []
    best_model = None
    best_loss = float('inf')
    best_params = None

    keys, values = zip(*param_grid.items())

    for combo in product(*values):
        params = dict(zip(keys, combo))
        print(f"\n🔍 Testing config:\n{params}")

        # 1. Train AE
        ae_model, ae_val_loss = train_autoencoder(
            X_train, X_val,
            input_dim=input_dim,
            hidden_layers=params['ae_layers'],
            lr=params['lr'],
            batch_size=params['batch_size'],
            epochs=params['ae_epochs'],
            verbose=False
        )

        # 2. Extract encoder
        encoder = extract_encoder(ae_model)

        # 3. Train regressor
        reg_model, reg_val_loss = train_encoder_regressor(
            encoder, X_train, y_train, X_val, y_val,
            reg_layers=params['reg_layers'],
            lr=params['lr'],
            batch_size=params['batch_size'],
            epochs=params['reg_epochs'],
            fine_tune=params['fine_tune'],
            verbose=False
        )

        total_val_loss = reg_val_loss
        print(f"→ AE Val Loss: {ae_val_loss:.6f} | REG Val Loss: {reg_val_loss:.6f}")

        results.append((params, total_val_loss))
        if total_val_loss < best_loss:
            best_loss = total_val_loss
            best_model = reg_model
            best_params = params

    results.sort(key=lambda x: x[1])
    print("\n✅ Best config:", best_params)
    print(f"✅ Best Val Loss: {best_loss:.6f}")
    return results, best_model, best_params


In [None]:
# --- Hierarchical Model (uses your MLP class) ---
class HierarchicalMLP(nn.Module):
    def __init__(self, mlp_class=MLP, pair_arch=[8,"RL",4,"RL"], mid_arch=[8,"RL",4,"RL"], final_arch=[16,"RL"]):
        super().__init__()
        self.pairs = nn.ModuleList([mlp_class(2, 1, pair_arch) for _ in range(6)])  # 6×(2→1)
        self.mids  = nn.ModuleList([mlp_class(3, 3, mid_arch) for _ in range(2)])   # 2×(3→3)
        self.final = mlp_class(6, 3, final_arch)                                   # 6→3

    def forward(self, x):
        pair_outs = [self.pairs[i](x[:, 2*i:2*i+2]) for i in range(6)]
        mid1 = self.mids[0](torch.cat(pair_outs[:3], dim=1))
        mid2 = self.mids[1](torch.cat(pair_outs[3:], dim=1))
        return self.final(torch.cat([mid1, mid2], dim=1))

from itertools import product
import torch, torch.nn as nn, torch.optim as optim

def grid_search_mlp_multi(X_train, y_train, X_val, y_val, param_grid):
    results, best_loss, best_model, best_params = [], float('inf'), None, None
    keys, values = zip(*param_grid.items())

    # Convert to tensors once
    X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)
    X_val,   y_val   = torch.tensor(X_val, dtype=torch.float32),   torch.tensor(y_val, dtype=torch.float32)
    criterion = nn.MSELoss()

    for combo in product(*values):
        params = dict(zip(keys, combo))
        print(f"Testing: {params}")

        # ✅ Build model directly
        model = HierarchicalMLP(
            pair_arch=params.get("pair_arch", [8, "RL", 4, "RL"]),
            mid_arch=params.get("mid_arch", [8, "RL", 4, "RL"]),
            final_arch=params.get("final_arch", [16, "RL"])
        )

        optimizer = optim.Adam(model.parameters(), lr=params.get("lr", 1e-3))
        loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(X_train, y_train),
            batch_size=params.get("batch_size", 32), shuffle=True
        )

        # --- Train ---
        for _ in range(params.get("epochs", 100)):
            for xb, yb in loader:
                optimizer.zero_grad()
                loss = criterion(model(xb), yb)
                loss.backward()
                optimizer.step()

        # --- Validate ---
        model.eval()
        with torch.no_grad():
            val_loss = criterion(model(X_val), y_val).item()

        results.append((params, val_loss))
        if val_loss < best_loss:
            best_loss, best_model, best_params = val_loss, model, params

    results.sort(key=lambda x: x[1])
    return results, best_model, best_params