In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
import shap

file_path = r"C:\Users\Lenovo\Desktop\Yelp Dataset\final_data1.xlsx"
df = pd.read_excel(file_path)

y = df['stars']
X = df.drop(columns=['business_id', 'stars'])

xgb_model = xgb.XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
xgb_model.fit(X, y)

explainer = shap.Explainer(xgb_model, X)
shap_values = explainer(X)

shap_df = pd.DataFrame({
    'Feature': X.columns,
    'MeanAbsSHAP': np.abs(shap_values.values).mean(axis=0)
}).sort_values(by='MeanAbsSHAP', ascending=False).head(15)

sns.set(style="whitegrid", font_scale=1.2)
plt.figure(figsize=(10, 7))

colors = []
for feature in shap_df['Feature']:
    idx = X.columns.get_loc(feature)
    mean_shap_val = shap_values.values[:, idx].mean()
    colors.append('steelblue' if mean_shap_val >= 0 else 'darkorange')

bars = plt.barh(shap_df['Feature'], shap_df['MeanAbsSHAP'], color=colors)
plt.gca().invert_yaxis()  # largest value on top
plt.xlabel("Mean |SHAP value|", fontsize=14)
plt.ylabel("Feature", fontsize=14)

for i in range(5):
    val = shap_df['MeanAbsSHAP'].iloc[i]
    bar = bars[i]
    plt.text(val + 0.002, bar.get_y() + bar.get_height()/2,
             f"{val:.3f}", va='center', fontsize=12, weight='bold')

plt.tight_layout()
plt.show()