In [None]:
import pandas as pd
from xgboost import XGBRegressor
import matplotlib.pyplot as plt
import shap
from sklearn.inspection import permutation_importance

In [None]:
#Load data
X = pd.read_csv("X.csv")
y = pd.read_csv("y.csv").squeeze()  #Converts from DataFrame to Series

print(X.shape, y.shape)
print()
print(X.head())
print(y.head())

In [None]:
xgb = XGBRegressor(
    n_estimators=600,
    max_depth=6,
    learning_rate=0.05,
    reg_lambda=1.0,
    reg_alpha=0.3,
    subsample=0.8,
    colsample_bytree=0.8,
    tree_method="hist",  # Fast and efficient
    random_state=42
)

xgb.fit(X, y)

gain_importance = pd.Series(xgb.feature_importances_, index=X.columns).sort_values(ascending=True)
print("Training complete")

In [None]:
print(gain_importance)

In [None]:
#Plot feature importances

plt.figure(figsize=(12, 10))
gain_importance.plot(kind="barh")
plt.title("XGBoost Feature Importances (Gain-Based)")
plt.xlabel("Gain Importance")
plt.tight_layout()
plt.savefig("xgb_feature_importance_all.png", dpi=300, bbox_inches="tight")
plt.show()

In [None]:

#Generate SHAP values
explainer = shap.Explainer(xgb)
shap_vals = explainer(X)

#Calculate mean absolute SHAP value per feature
mean_abs_shap = np.abs(shap_vals.values).mean(axis=0)
shap_importance = pd.Series(mean_abs_shap, index=X.columns).sort_values(ascending=True)

#P
plt.figure(figsize=(10, 8))
shap_importance.tail(20).plot(kind="barh")  # Change tail(N) to control how many features to show
plt.title("SHAP Feature Importances (Mean Absolute Value)")
plt.xlabel("Mean |SHAP Value|")
plt.ylabel("Feature")
plt.tight_layout()
plt.show()
