In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet, RidgeCV
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, StackingRegressor
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.datasets import make_regression # Added for creating a sample dataset

# --- 1. Load Data ---
# I am creating a sample dataset here so the script can run.
# REPLACE THIS PART with your own data loading code.
df = pd.read_csv(r"D:\Coding\Major-Project\new_\data\preprocessed_data.csv")
#X, y = make_regression(n_samples=1500, n_features=8, noise=25, random_state=42)
# You would then define your X and y from your dataframe 'df'
X = df[["I","P","Q","T","Hydrogen","Oxygen","RH anode","Rh Cathode"]].values
y = df["V"].values

# --- 2. Split Data ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- 3. Define the Stacking Model ---
# Define the base models (Layer 0)
# A more competitive set of base learners with tweaked hyperparameters
# A more competitive set of base learners with tweaked hyperparameters
# base_learners = [
#     ("enet", make_pipeline(StandardScaler(), ElasticNet(alpha=0.01, l1_ratio=0.9, max_iter=5000))),
#     ("svr",  make_pipeline(StandardScaler(), SVR(kernel="rbf", C=100, gamma='auto', epsilon=0.1))),
#     ("knn",  make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=5, weights='distance'))),
#     ("gbr",  GradientBoostingRegressor(n_estimators=150, learning_rate=0.1, max_depth=4, random_state=42))
# ]

# A more competitive set of base learners with a heavily simplified GBR
# base_learners = [
#     ("enet", make_pipeline(StandardScaler(), ElasticNet(alpha=0.01, l1_ratio=0.9, max_iter=5000))),
#     ("svr",  make_pipeline(StandardScaler(), SVR(kernel="rbf", C=100, gamma='auto', epsilon=0.1))),
#     ("knn",  make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=5, weights='distance'))),
#     # Drastic simplification of GBR to force other models to contribute more
#     ("gbr",  GradientBoostingRegressor(n_estimators=50, learning_rate=0.1, max_depth=2, random_state=42))
# ]
base_learners = [
    ("enet", make_pipeline(StandardScaler(), ElasticNet(alpha=0.01, l1_ratio=0.9, max_iter=5000))),
    ("svr",  make_pipeline(StandardScaler(), SVR(kernel="rbf", C=100, gamma='auto', epsilon=0.1))),
    ("knn",  make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=5, weights='distance'))),
    # Drastic simplification of GBR to force other models to contribute more
    ("gbr",  GradientBoostingRegressor(n_estimators=30, learning_rate=0.1, max_depth=2, random_state=42))
]

# Define the meta-model (Layer 1)
meta_learner = RidgeCV(alphas=np.logspace(-4, 4, 25))

# Create the full stacking regressor
stacking_model = StackingRegressor(
    estimators=base_learners,
    final_estimator=meta_learner,
    cv=5,
    passthrough=False
)

# --- 4. Train the Model ---
print("Training the stacking model...")
stacking_model.fit(X_train, y_train)
print("Training complete.")

# --- 5. Evaluate the Model ---
y_pred = stacking_model.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("\n===== Model Performance =====")
print(f"RMSE: {rmse:.4f}")
print(f"R²:   {r2:.4f}")
print(f"MAE:  {mae:.4f}")

# --- 6. EXTRACT AND PRINT THE META-LEARNER PARAMETERS ---
# This is the section that extracts the values you asked for.

# Get the optimal Lambda (λ) found by RidgeCV
lambda_val = stacking_model.final_estimator_.alpha_

# Get the Intercept (β₀)
beta_0 = stacking_model.final_estimator_.intercept_

# Get the weights for each base learner (β₁ to β₄)
beta_coeffs = stacking_model.final_estimator_.coef_

# Print the results in a clear format
print("\n===== Meta-Learner Parameters =====")
print(f"Optimal Lambda (λ): {lambda_val:.4f}\n")
print(f"Intercept (β₀): {beta_0:.4f}")
# The order of the coefficients matches the order in the 'base_learners' list
print(f"Weight for ElasticNet (β₁): {beta_coeffs[0]:.4f}")
print(f"Weight for SVR (β₂): \t{beta_coeffs[1]:.4f}")
print(f"Weight for KNN (β₃): \t{beta_coeffs[2]:.4f}")
print(f"Weight for GBR (β₄): \t{beta_coeffs[3]:.4f}")

Training the stacking model...
Training complete.

===== Model Performance =====
RMSE: 1.7813
R²:   0.9991
MAE:  1.1917

===== Meta-Learner Parameters =====
Optimal Lambda (λ): 0.0001

Intercept (β₀): -16.1825
Weight for ElasticNet (β₁): 0.0226
Weight for SVR (β₂): 	-0.2015
Weight for KNN (β₃): 	0.3881
Weight for GBR (β₄): 	0.8295
