In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet, RidgeCV
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, StackingRegressor
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from xgboost import XGBRegressor

In [4]:
df = pd.read_csv(r"D:\D-drive\Coding\Major-Project\new_\data\preprocessed_data.csv")

In [5]:
X = df[["I","P","Q","T","Hydrogen","Oxygen","RH anode","Rh Cathode"]].values
y = df["V"].values

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
base_learners_baseline = [
    ("enet", make_pipeline(StandardScaler(), ElasticNet(max_iter=5000))),
    ("svr",  make_pipeline(StandardScaler(), SVR(kernel="rbf", C=10, epsilon=0.1))),
    ("knn",  make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=7))),
    ("gbr",  GradientBoostingRegressor(n_estimators=300, learning_rate=0.05, max_depth=3, random_state=42)),
]

In [8]:
meta = RidgeCV(alphas=np.logspace(-4, 4, 25))

In [9]:
stack_baseline = StackingRegressor(
    estimators=base_learners_baseline,
    final_estimator=meta,
    cv=5,
    passthrough=False
)

In [10]:
stack_baseline.fit(X_train, y_train)
y_pred_base = stack_baseline.predict(X_test)

In [11]:
rmse_base = np.sqrt(mean_squared_error(y_test, y_pred_base))
r2_base = r2_score(y_test, y_pred_base)
mae_base = mean_absolute_error(y_test, y_pred_base)

In [12]:
print("\n===== Baseline Stack =====")
print(f"RMSE: {rmse_base:.4f}")
print(f"R²:   {r2_base:.4f}")
print(f"MAE:  {mae_base:.4f}")


===== Baseline Stack =====
RMSE: 0.9311
R²:   0.9998
MAE:  0.5014


In [13]:
# Improved Stack (with XGB + passthrough)

base_learners_improved = [
    ("enet", make_pipeline(StandardScaler(), ElasticNet(max_iter=5000))),
    ("svr",  make_pipeline(StandardScaler(), SVR(kernel="rbf", C=10, epsilon=0.1))),
    ("knn",  make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=7))),
    ("xgb",  XGBRegressor(n_estimators=300, learning_rate=0.05, max_depth=3,
                           random_state=42, verbosity=0)),
]

In [14]:
stack_improved = StackingRegressor(
    estimators=base_learners_improved,
    final_estimator=meta,
    cv=5,
    passthrough=True   # Let meta-learner see original features + preds
)

In [15]:
stack_improved.fit(X_train, y_train)
y_pred_imp = stack_improved.predict(X_test)

In [16]:
rmse_imp = np.sqrt(mean_squared_error(y_test, y_pred_imp))
r2_imp = r2_score(y_test, y_pred_imp)
mae_imp = mean_absolute_error(y_test, y_pred_imp)

In [17]:
print("\n===== Improved Stack (XGB + passthrough) =====")
print(f"RMSE: {rmse_imp:.4f}")
print(f"R²:   {r2_imp:.4f}")
print(f"MAE:  {mae_imp:.4f}")


===== Improved Stack (XGB + passthrough) =====
RMSE: 1.0656
R²:   0.9997
MAE:  0.6044


In [18]:
# Extra: Cross-validated RMSE

scores_base = cross_val_score(stack_baseline, X, y, cv=5,
                              scoring="neg_root_mean_squared_error")
scores_imp = cross_val_score(stack_improved, X, y, cv=5,
                             scoring="neg_root_mean_squared_error")

print("\n===== Cross-Validation RMSE =====")
print(f"Baseline Stack CV RMSE: {-scores_base.mean():.4f}")
print(f"Improved Stack CV RMSE: {-scores_imp.mean():.4f}")



===== Cross-Validation RMSE =====
Baseline Stack CV RMSE: 21.1786
Improved Stack CV RMSE: 49.1663
