In [3]:
%pip install scikit-learn joblib tensorflow
%pip install shap
%pip install matplotlib
%pip install seaborn

import os
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import seaborn as sns
import shap
import json

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


2025-05-16 18:04:54.351855: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747418694.374388 2860580 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747418694.381481 2860580 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747418694.401889 2860580 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747418694.401918 2860580 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747418694.401920 2860580 computation_placer.cc:177] computation placer alr

In [4]:
def build_regression_mlp(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),                 
        Dense(128, activation='relu'),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss="mse", metrics=["mae"])
    return model

In [5]:
def evaluate(y_true, y_pred, results, solver_name, label):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    rel_rmse = rmse / np.mean(y_true)

    safe_y_true = np.where(y_true == 0, 1e-8, y_true)
    mape = np.mean(np.abs((y_true - y_pred) / safe_y_true)) * 100

    result = {
        "Solver": solver_name,
        "Dataset": label,
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        "R2": r2,
        "Rel_RMSE": rel_rmse,
        "MAPE (%)": mape,
    }
    results.append(result)

In [None]:
def log_mlp_shap_and_importance(model, X_val, y_val, pred_val, features, solver_name, target):

    background = X_val[np.random.choice(X_val.shape[0], min(100, X_val.shape[0]), replace=False)]

    try:
        explainer = shap.DeepExplainer(model, background)
        shap_values = explainer.shap_values(X_val)

        
        if isinstance(shap_values, list):
            shap_values = shap_values[0]

        shap_values = np.squeeze(shap_values)
        assert shap_values.shape[1] == len(features), f"SHAP returned {shap_values.shape[1]} features, expected {len(features)}"

    except Exception as e:
        print(f"SHAP failed for {target}, error: {e}")
        return

    # Save SHAP values
    shap_df = pd.DataFrame(shap_values, columns=features)
    shap_df["predicted_value"] = pred_val
    shap_df["actual_value"] = y_val
    shap_df["target"] = target
    shap_df["solver"] = solver_name

    os.makedirs("./mlp/mlp_shap_values", exist_ok=True)
    shap_filename = f"./mlp/mlp_shap_values/shap_{solver_name}_{target}_reg.csv"
    shap_df.to_csv(shap_filename, index=False)

    # Feature importance 
    importance_df = pd.DataFrame({
        "feature": features,
        "importance": np.abs(shap_values).mean(axis=0),
        "target": target,
        "solver": solver_name,
    })

    os.makedirs("./mlp/mlp_feature_importance", exist_ok=True)
    importance_file = "./mlp/mlp_feature_importance/mlp_feature_importance_reg.csv"
    importance_df.to_csv(importance_file, mode='a', index=False, header=not os.path.exists(importance_file))

    # Save top-5
    top5_file = "./mlp/mlp_feature_importance/mlp_top5_feature_importance_reg.csv"
    importance_df.sort_values(by="importance", ascending=False).head(5).to_csv(
        top5_file, mode='a', index=False, header=not os.path.exists(top5_file)
    )

In [None]:
def train_mlp_for_solver(solver_name, train_file, test_file, val_file):
    df_train = pd.read_csv(train_file)
    df_test = pd.read_csv(test_file)
    df_val = pd.read_csv(val_file)

    df_train.dropna(inplace=True)
    df_test.dropna(inplace=True)
    df_val.dropna(inplace=True)

    targets = ["solution_time", "optimality_gap", "peak_memory"]
    features = [
        "number_of_elements", "capacity", "max_weight", "min_weight", "mean_weight",
        "median_weight", "std_weight", "weight_range", "max_profit", "min_profit", "mean_profit",
        "median_profit", "std_profit", "profit_range", "renting_ratio", "mean_weight_profit_ratio",
        "median_weight_profit_ratio", "capacity_mean_weight_ratio", "capacity_median_weight_ratio",
        "capacity_std_weight_ratio", "std_weight_profit_ratio", "weight_profit_correlation",
        "ram", "cpu_cores"
    ]

    # Clean targets
    for df in [df_train, df_test, df_val]:
        for col in targets:
            df[col] = pd.to_numeric(df[col], errors='coerce')
            df.dropna(subset=[col], inplace=True)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(df_train[features])
    X_test = scaler.transform(df_test[features])
    X_val = scaler.transform(df_val[features])

    os.makedirs("./mlp_configs", exist_ok=True)
    results = []

    for target in targets:
        y_train = df_train[target].values
        y_test = df_test[target].values
        y_val = df_val[target].values

        # Normalize target
        y_scaler = StandardScaler()
        y_train_scaled = y_scaler.fit_transform(y_train.reshape(-1, 1)).flatten()
        y_test_scaled = y_scaler.transform(y_test.reshape(-1, 1)).flatten()
        y_val_scaled = y_scaler.transform(y_val.reshape(-1, 1)).flatten()

        best_rmse = float("inf")
        best_model = None
        best_epoch = None
        best_pred_test = None
        best_pred_val = None
        best_y_test = y_scaler.inverse_transform(y_test_scaled.reshape(-1, 1)).flatten()
        best_y_val = y_scaler.inverse_transform(y_val_scaled.reshape(-1, 1)).flatten()

        for epochs in [100, 500]:
            model = build_regression_mlp(X_train.shape[1])
            model.fit(X_train, y_train_scaled, epochs=epochs, batch_size=32,
                      validation_split=0.2, verbose=0)

            pred_test_scaled = model.predict(X_test).flatten()
            pred_val_scaled = model.predict(X_val).flatten()
            pred_test = y_scaler.inverse_transform(pred_test_scaled.reshape(-1, 1)).flatten()
            pred_val = y_scaler.inverse_transform(pred_val_scaled.reshape(-1, 1)).flatten()

            rmse = np.sqrt(mean_squared_error(best_y_val, pred_val))

            if rmse < best_rmse:
                best_rmse = rmse
                best_model = model
                best_epoch = epochs
                best_pred_test = pred_test
                best_pred_val = pred_val

        print("[TEST]")
        evaluate(best_y_test, best_pred_test, results, solver_name, f"{target} (Test)")
        print("[VAL]")
        evaluate(best_y_val, best_pred_val, results, solver_name, f"{target} (Val)")
        
        # Save model
        model_path = f"./mlp_models/mlp_model_{solver_name}_{target}.h5"
        best_model.save(model_path)
        print(f"Saved model to {model_path}")

        log_mlp_shap_and_importance(best_model, X_val, best_y_val, best_pred_val, features, solver_name, target)

        # Save best config
        config_path = f"./mlp_configs/best_mlp_{solver_name}_{target}_reg.json"
        with open(config_path, "w") as f:
            json.dump({"epochs": best_epoch}, f, indent=4)

    # Save results
    results_df = pd.DataFrame(results)
    results_file = "./mlp_evaluation_results_reg.csv"
    results_df.to_csv(results_file, mode='a', index=False, header=not os.path.exists(results_file))

In [8]:
def run_all_models(base_folder):
    for root, dirs, files in os.walk(base_folder):
        for folder in dirs:
            folder_path = os.path.join(root, folder)
            csv_files = os.listdir(folder_path)

            train_file = [f for f in csv_files if f.endswith("_train.csv")]
            test_file = [f for f in csv_files if f.endswith("_test.csv")]
            val_file = [f for f in csv_files if f.endswith("_val.csv")]

            if train_file and test_file and val_file:
                train_fp = os.path.join(folder_path, train_file[0])
                test_fp = os.path.join(folder_path, test_file[0])
                val_fp = os.path.join(folder_path, val_file[0])

                solver_name = folder
                train_mlp_for_solver(solver_name, train_fp, test_fp, val_fp)

In [None]:
base_folder = "./trainingData/final_td_min"  #Specify path to training data
run_all_models(base_folder)

I0000 00:00:1747418697.693526 2860580 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1067 MB memory:  -> device: 0, name: NVIDIA RTX A5000, pci bus id: 0000:25:00.0, compute capability: 8.6
I0000 00:00:1747418697.699608 2860580 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22170 MB memory:  -> device: 1, name: NVIDIA RTX A5000, pci bus id: 0000:61:00.0, compute capability: 8.6
I0000 00:00:1747418699.367252 2861634 service.cc:152] XLA service 0x7ce75c00a500 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1747418699.367302 2861634 service.cc:160]   StreamExecutor device (0): NVIDIA RTX A5000, Compute Capability 8.6
I0000 00:00:1747418699.367309 2861634 service.cc:160]   StreamExecutor device (1): NVIDIA RTX A5000, Compute Capability 8.6
2025-05-16 18:04:59.483136: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 73ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 





Target: SOLUTION_TIME | Best Epochs: 500
[TEST]
[VAL]
Saved MLP model to ./results_min_kp/mlp_models/mlp_model_or_min_solution_time.h5


Expected: keras_tensor_6
Received: inputs=['Tensor(shape=(100, 24))']
Expected: keras_tensor_6
Received: inputs=['Tensor(shape=(200, 24))']
Expected: keras_tensor_6
Received: inputs=['Tensor(shape=(560, 24))']


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 





Target: OPTIMALITY_GAP | Best Epochs: 500
[TEST]
[VAL]
Saved MLP model to ./results_min_kp/mlp_models/mlp_model_or_min_optimality_gap.h5


Expected: keras_tensor_18
Received: inputs=['Tensor(shape=(100, 24))']
Expected: keras_tensor_18
Received: inputs=['Tensor(shape=(200, 24))']
Expected: keras_tensor_18
Received: inputs=['Tensor(shape=(560, 24))']


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 





Target: PEAK_MEMORY | Best Epochs: 500
[TEST]
[VAL]
Saved MLP model to ./results_min_kp/mlp_models/mlp_model_or_min_peak_memory.h5


Expected: keras_tensor_30
Received: inputs=['Tensor(shape=(100, 24))']
Expected: keras_tensor_30
Received: inputs=['Tensor(shape=(200, 24))']
Expected: keras_tensor_30
Received: inputs=['Tensor(shape=(560, 24))']


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 





Target: SOLUTION_TIME | Best Epochs: 500
[TEST]
[VAL]
Saved MLP model to ./results_min_kp/mlp_models/mlp_model_gurobi_min_solution_time.h5


Expected: keras_tensor_42
Received: inputs=['Tensor(shape=(100, 24))']
Expected: keras_tensor_42
Received: inputs=['Tensor(shape=(200, 24))']
Expected: keras_tensor_42
Received: inputs=['Tensor(shape=(560, 24))']


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


  rel_rmse = rmse / np.mean(y_true)
  rel_rmse = rmse / np.mean(y_true)



Target: OPTIMALITY_GAP | Best Epochs: 500
[TEST]
[VAL]
Saved MLP model to ./results_min_kp/mlp_models/mlp_model_gurobi_min_optimality_gap.h5


Expected: keras_tensor_54
Received: inputs=['Tensor(shape=(100, 24))']
Expected: keras_tensor_54
Received: inputs=['Tensor(shape=(200, 24))']
Expected: keras_tensor_54
Received: inputs=['Tensor(shape=(560, 24))']


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 





Target: PEAK_MEMORY | Best Epochs: 500
[TEST]
[VAL]
Saved MLP model to ./results_min_kp/mlp_models/mlp_model_gurobi_min_peak_memory.h5


Expected: keras_tensor_66
Received: inputs=['Tensor(shape=(100, 24))']
Expected: keras_tensor_66
Received: inputs=['Tensor(shape=(200, 24))']
Expected: keras_tensor_66
Received: inputs=['Tensor(shape=(560, 24))']


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 





Target: SOLUTION_TIME | Best Epochs: 100
[TEST]
[VAL]
Saved MLP model to ./results_min_kp/mlp_models/mlp_model_greedy_min_solution_time.h5


Expected: keras_tensor_72
Received: inputs=['Tensor(shape=(100, 24))']
Expected: keras_tensor_72
Received: inputs=['Tensor(shape=(200, 24))']
Expected: keras_tensor_72
Received: inputs=['Tensor(shape=(560, 24))']


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 





Target: OPTIMALITY_GAP | Best Epochs: 500
[TEST]
[VAL]
Saved MLP model to ./results_min_kp/mlp_models/mlp_model_greedy_min_optimality_gap.h5


Expected: keras_tensor_90
Received: inputs=['Tensor(shape=(100, 24))']
Expected: keras_tensor_90
Received: inputs=['Tensor(shape=(200, 24))']
Expected: keras_tensor_90
Received: inputs=['Tensor(shape=(560, 24))']


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 





Target: PEAK_MEMORY | Best Epochs: 500
[TEST]
[VAL]
Saved MLP model to ./results_min_kp/mlp_models/mlp_model_greedy_min_peak_memory.h5


Expected: keras_tensor_102
Received: inputs=['Tensor(shape=(100, 24))']
Expected: keras_tensor_102
Received: inputs=['Tensor(shape=(200, 24))']
Expected: keras_tensor_102
Received: inputs=['Tensor(shape=(560, 24))']
