# 03 â€“ Explainability (SHAP)

In [None]:
import pandas as pd, numpy as np
import shap
from sklearn.pipeline import Pipeline
from src.data_prep import build_preprocessor
from src.features import add_engineered_features
from src.modeling import get_models
import matplotlib.pyplot as plt
import os

seed = 42
Df = pd.read_csv('../data/processed/clean.csv')
Df = add_engineered_features(Df)
Df['log_price'] = np.log(Df['price'].clip(lower=1))
X = Df.drop(columns=['price','log_price'])
y = Df['log_price']

pre = build_preprocessor()
model = get_models(random_state=seed)['xgb']
pipe = Pipeline([('pre', pre), ('model', model)])
pipe.fit(X, y)

Xt = pipe.named_steps['pre'].fit_transform(X)
xgb = pipe.named_steps['model']
explainer = shap.TreeExplainer(xgb)
shap_values = explainer.shap_values(Xt)

os.makedirs('../reports/figures', exist_ok=True)
plt.figure()
shap.summary_plot(shap_values, Xt, show=False)
plt.tight_layout()
plt.savefig('../reports/figures/shap_summary.png')
print('Saved shap_summary.png')