In [None]:
# this is to try gradient boosting on the ensembled models

import pandas as pd
import lightgbm as lgb
import xgboost as xgb
import matplotlib.pyplot as plt
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor
import numpy as np

# Load datasets
main_data = pd.read_csv("./data/train.csv")  # Superconductivity dataset
unique_m = pd.read_csv("./data/unique_m.csv")

# Remove 'critical_temp' from unique_m to avoid duplication
unique_m = unique_m.drop(columns=["critical_temp"], errors='ignore')

# Merge datasets assuming rows align (index-based merge)
merged_data = pd.concat([main_data, unique_m], axis=1)

# Define target and features
target = "critical_temp"  # Target variable
X = merged_data.drop(columns=[target, "material"])  # Drop 'material' column
y = merged_data[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Optimized LightGBM Model
optimized_lgb = lgb.LGBMRegressor(n_estimators=496, max_depth=15, learning_rate=0.057878589503943714, 
                                  subsample=0.6619352139576826, colsample_bytree=0.7512301369524537, 
                                  num_leaves=148, verbose=-1, force_col_wise=True)
optimized_lgb.fit(X_train, y_train)

# Train Optimized XGBoost Model
optimized_xgb = xgb.XGBRegressor(n_estimators=407, max_depth=10, learning_rate=0.02962746174406205,
                                 subsample=0.8786056663685927, colsample_bytree=0.6260167856358314,
                                 gamma=4.321388407974591, tree_method="hist", random_state=42)
optimized_xgb.fit(X_train, y_train)

# Generate predictions for meta-model training
y_pred_lgb_train = optimized_lgb.predict(X_train)
y_pred_xgb_train = optimized_xgb.predict(X_train)

# Generate predictions for meta-model testing
y_pred_lgb_test = optimized_lgb.predict(X_test)
y_pred_xgb_test = optimized_xgb.predict(X_test)

# Stack predictions as new features
X_meta_train = np.column_stack((y_pred_lgb_train, y_pred_xgb_train))
X_meta_test = np.column_stack((y_pred_lgb_test, y_pred_xgb_test))

# Train Meta-Learner (Gradient Boosting)
meta_model = GradientBoostingRegressor(n_estimators=200, max_depth=3, learning_rate=0.05, random_state=42)
meta_model.fit(X_meta_train, y_train)

# Meta Model Predictions
y_pred_meta = meta_model.predict(X_meta_test)

# Evaluate Meta Model
meta_rmse = np.sqrt(mean_squared_error(y_test, y_pred_meta))
meta_r2 = r2_score(y_test, y_pred_meta)

print(f"Meta-Learning Model (Gradient Boosting) - Test RMSE: {meta_rmse:.4f}, Test R²: {meta_r2:.4f}")


Rsults:

Meta-Learning Model (Gradient Boosting) - Test RMSE: 8.5433, Test R²: 0.9366