In [33]:
import numpy as np
import pandas as pd
import joblib
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load data
data = pd.read_csv('output/final_df.csv')
y = data['aqi_pm2.5']
X = data.drop('aqi_pm2.5', axis=1)

# Time-based split (same as before)
split_idx = int(0.8 * len(data))
X_test = X.iloc[split_idx:]
y_test = y.iloc[split_idx:]

# Load models
xgb_data = joblib.load('output/models/xgb_model.pkl')
rf_data = joblib.load('output/models/rf_model.pkl')

xgb_model = xgb_data['model']
rf_model = rf_data['model']
xgb_scaler = xgb_data['scaler_X']
rf_scaler = rf_data['scaler_X']

In [34]:
# Scale test data
X_test_xgb_scaled = xgb_scaler.transform(X_test)
X_test_rf_scaled = rf_scaler.transform(X_test)

# Get predictions
y_pred_xgb = xgb_model.predict(X_test_xgb_scaled)
y_pred_rf = rf_model.predict(X_test_rf_scaled)

In [35]:
# Combine predictions
predictions_df = pd.DataFrame({
    'XGBoost': y_pred_xgb,
    'RandomForest': y_pred_rf
})

# Train meta-model
meta_model = LinearRegression()
meta_model.fit(predictions_df, y_test)

# Ensemble predictions
y_pred_ensemble = meta_model.predict(predictions_df)

In [36]:
# Calculate metrics
metrics = {
    'XGBoost': {
        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_xgb)),
        'R2': r2_score(y_test, y_pred_xgb)
    },
    'RandomForest': {
        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_rf)),
        'R2': r2_score(y_test, y_pred_rf)
    },
    'Ensemble': {
        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred_ensemble)),
        'R2': r2_score(y_test, y_pred_ensemble)
    }
}

# Display results
results_df = pd.DataFrame(metrics).T
print("Model Performance:")
print(results_df)
print(f"\nEnsemble weights - XGB: {meta_model.coef_[0]:.3f}, RF: {meta_model.coef_[1]:.3f}")
print(f"Ensemble RMSE: {metrics['Ensemble']['RMSE']:.2f}")

Model Performance:
                   RMSE        R2
XGBoost       33.687158  0.774944
RandomForest  33.938838  0.771569
Ensemble      33.027526  0.783671

Ensemble weights - XGB: 0.544, RF: 0.472
Ensemble RMSE: 33.03
