## Model Explanation

In [None]:
# !pip install --upgrade pip --quiet
# !pip install -U scikit-learn==1.3.0 shap joblib --quiet

In [None]:
import warnings
import pandas as pd
import matplotlib.pyplot as plt
import shap
import joblib
warnings.filterwarnings("ignore", message=".*The 'nopython' keyword.*")
import utilities

In [None]:
print(joblib.__version__)
print(shap.__version__)

In [None]:
# Load model.
X_test = pd.read_csv("../datasets/X_test_working_for_real.csv", sep=",")
rf_reg = joblib.load("../predict/RFR_0.51.joblib")

In [None]:
# gscpi = utilities.get_gscpi("../datasets/extra-dataset/GSCPI_data.csv")
# utilities.wrangle_test_data(X_test, gscpi)

In [None]:
rf_reg.predict(X_test)

In [None]:
# rather than use the whole training set to estimate expected values, we summarize with
# a set of weighted kmeans, each weighted by the number of points they represent.
X_test_summary = shap.kmeans(X_test, 200)

In [None]:
exp = shap.TreeExplainer(rf_reg,
                         # X_test_summary
                        )

In [None]:
shap_values = exp.shap_values(X_test_summary.data)

In [None]:
feature_names=['Site', 'Reference proxy', 'Customer Persona proxy', 'Strategic Product Family proxy', 'Date', 'Month 1', 'Month 2', 'Month 3', 'gscpi']
summary_plot = shap.summary_plot(shap_values, X_test_summary.data, feature_names=feature_names, show=False)

# Save summary plot to PNG
plt.savefig('./summary_plot.png', bbox_inches='tight')

In [None]:
for name in feature_names:
    shap.dependence_plot(name , shap_values, X_test_summary.data, feature_names, show=False)
    plt.savefig(f"./dependence_plot_{name}.png", bbox_inches='tight')

In [None]:
shap.dependence_plot("Customer Persona proxy" , shap_values, X_test_summary.data, feature_names)