In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet, RidgeCV
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, StackingRegressor
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from xgboost import XGBRegressor

In [None]:
# !pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.5-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.5-py3-none-win_amd64.whl (56.8 MB)
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.3/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.5/56.8 MB 1.3 MB/s eta 0:00:44
    --------------------------------------- 0.8/56.8 MB 958.5 kB/s eta 0:00:59
    --------------------------------------- 1.0/56.8 MB 1.0 MB/s eta 0:00:56
    --------------------------------------- 1.3/56.8 MB 1.1 MB/s eta 0:00:51
   - -------------------------------------- 1.6/56.8 MB 1.1 MB/s eta 0:00:51
   - -------------------------------------- 1.8/56.8 MB 1.1 MB/s eta 0:00:50
   - -------------------------------------- 2.1/56.8 MB 1.1 MB/s eta 0:00:49
   - -------------------------------------- 2.1/56.8 MB 1.1 MB/s eta 0:00:49
   - --------------------

In [5]:
df = pd.read_csv(r"C:\Users\HARSHITH\Downloads\pemfc_preprocessed_data.csv")


In [6]:
df.head()

Unnamed: 0,I,P,Q,T,Hydrogen,Oxygen,RH anode,Rh Cathode,V
0,-0.886787,-1.027522,-0.806638,-1.261444,-1.873236,-1.227992,-2.493136,-1.875273,491.114619
1,-0.886787,-1.027522,-0.806638,-1.261444,-1.87299,-1.227992,-2.493136,-1.875273,491.114619
2,-0.886787,-1.027522,-0.806638,-1.261444,-1.872671,-1.227991,-2.493136,-1.875273,491.114619
3,-0.886787,-1.027522,-0.806638,-1.261444,-1.870597,-1.227988,-2.493136,-1.875273,491.114619
4,-0.886787,-1.027522,-0.806638,-1.261444,-1.867493,-1.227981,-2.493136,-1.875273,491.114619


In [None]:
X = df[["I","T","Hydrogen","Oxygen","RH anode","Rh Cathode"]].values

In [60]:
y = df["V"].values

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [51]:
base_learners_baseline = [
    ("enet", make_pipeline(StandardScaler(), ElasticNet(max_iter=5000))),
    ("svr",  make_pipeline(StandardScaler(), SVR(kernel="rbf", C=10, epsilon=0.1))),
    ("knn",  make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=7))),
    ("gbr",  GradientBoostingRegressor(n_estimators=300, learning_rate=0.05, max_depth=3, random_state=42)),
]

In [52]:
meta = RidgeCV(alphas=np.logspace(-4, 4, 25))

In [53]:
stack_baseline = StackingRegressor(
    estimators=base_learners_baseline,
    final_estimator=meta,
    cv=5,
    passthrough=False
)

In [55]:
stack_baseline.fit(X_train, y_train)
y_pred_base = stack_baseline.predict(X_test)

In [56]:
rmse_base = np.sqrt(mean_squared_error(y_test, y_pred_base))
r2_base = r2_score(y_test, y_pred_base)
mae_base = mean_absolute_error(y_test, y_pred_base)

In [58]:
print("\n===== Baseline Stack =====")
print(f"RMSE: {rmse_base:.4f}")
print(f"R²:   {r2_base:.4f}")
print(f"MAE:  {mae_base:.4f}")


===== Baseline Stack =====
RMSE: 0.9764
R²:   0.9997
MAE:  0.5502


In [17]:
# Improved Stack (with XGB + passthrough)

base_learners_improved = [
    ("enet", make_pipeline(StandardScaler(), ElasticNet(max_iter=5000))),
    ("svr",  make_pipeline(StandardScaler(), SVR(kernel="rbf", C=10, epsilon=0.1))),
    ("knn",  make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=7))),
    ("xgb",  XGBRegressor(n_estimators=300, learning_rate=0.05, max_depth=3,
                           random_state=42, verbosity=0)),
]

In [18]:
stack_improved = StackingRegressor(
    estimators=base_learners_improved,
    final_estimator=meta,
    cv=5,
    passthrough=True   # Let meta-learner see original features + preds
)

In [19]:
stack_improved.fit(X_train, y_train)
y_pred_imp = stack_improved.predict(X_test)

In [20]:
rmse_imp = np.sqrt(mean_squared_error(y_test, y_pred_imp))
r2_imp = r2_score(y_test, y_pred_imp)
mae_imp = mean_absolute_error(y_test, y_pred_imp)

In [21]:
print("\n===== Improved Stack (XGB + passthrough) =====")
print(f"RMSE: {rmse_imp:.4f}")
print(f"R²:   {r2_imp:.4f}")
print(f"MAE:  {mae_imp:.4f}")


===== Improved Stack (XGB + passthrough) =====
RMSE: 1.0656
R²:   0.9997
MAE:  0.6044


In [22]:
# Extra: Cross-validated RMSE

scores_base = cross_val_score(stack_baseline, X, y, cv=5,
                              scoring="neg_root_mean_squared_error")
scores_imp = cross_val_score(stack_improved, X, y, cv=5,
                             scoring="neg_root_mean_squared_error")

print("\n===== Cross-Validation RMSE =====")
print(f"Baseline Stack CV RMSE: {-scores_base.mean():.4f}")
print(f"Improved Stack CV RMSE: {-scores_imp.mean():.4f}")


===== Cross-Validation RMSE =====
Baseline Stack CV RMSE: 21.3108
Improved Stack CV RMSE: 49.1663


In [26]:
# saving the base stacking model
import joblib
import pickle

best_model = stack_baseline
model_name = "baseline_stacking_model"


In [29]:
print("Saving the Baseline Stacking model (best performer).....")

joblib.dump(best_model, f'../models/{model_name}.pkl')
print(f"✓ Model saved as {model_name}.pkl")

Saving the Baseline Stacking model (best performer).....
✓ Model saved as baseline_stacking_model.pkl
