In [8]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

# ===============================
# 1. Load your dataset
# ===============================
df = pd.read_csv("./../Data Given for Challenge/data/normalized_features.csv")

# Example: assume 'out' is the target, and rest are features
X = df.drop(columns=["out", "Timestamp", "Location"], errors="ignore")
y = df["out"]

# Train/Validation split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ===============================
# 2. Define base models
# ===============================
base_models = [
    (
        "rf",
        RandomForestRegressor(
            n_estimators=300,
            random_state=42
        )
    ),  # CPU only
    (
        "xgb",
        XGBRegressor(
            n_estimators=300,
            learning_rate=0.05,
            max_depth=8,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            tree_method="gpu_hist"  # Remove or change if no GPU
        )
    ),
    (
        "lgbm",
        LGBMRegressor(
            n_estimators=300,
            learning_rate=0.05,
            num_leaves=64,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            device="gpu"  # Remove this argument if LightGBM GPU not available
        )
    )
]

# ===============================
# 3. Define stacking ensemble
# ===============================
stacked_model = StackingRegressor(
    estimators=base_models,
    final_estimator=RandomForestRegressor(n_estimators=300, random_state=42),
    passthrough=True,   # pass original features + predictions
    n_jobs=-1
)

# ===============================
# 4. Train model
# ===============================
stacked_model.fit(X_train, y_train)

# ===============================
# 5. Evaluate
# ===============================
y_pred = stacked_model.predict(X_val)

# Safe RMSE computation (works on all sklearn versions)
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_val, y_pred)
r2 = r2_score(y_val, y_pred)

print(f"✅ RMSE: {rmse:.4f}")
print(f"✅ MAE: {mae:.4f}")
print(f"✅ R² : {r2:.4f}")



    E.g. tree_method = "hist", device = "cuda"

  if len(data.shape) != 1 and self.num_features() != data.shape[1]:


✅ RMSE: 194.3919
✅ MAE: 34.8245
✅ R² : 0.7725


In [3]:
print(f"✅ RMSE: {rmse:.4f}")
print(f"✅ MAE: {mae:.4f}")
print(f"✅ R²: {r2:.4f}")

✅ RMSE: 205.4661
✅ MAE: 37.3227
✅ R²: 0.7459
