In [None]:
pip install shap

In [None]:
from xgboost import XGBRegressor
import shap
import pandas as pd
from sklearn.model_selection import train_test_split

# Importing excel sheets as dfs, should both have samples in the same order
predictors_df = pd.read_excel('../Predictors_Cleaned.xlsx', 0)
    # Rows are samples, columns are predictors
outcomes_df = pd.read_excel('../Outcomes_Cleaned.xlsx', 0)
    # Rows are samples, columns are mechanisms

# Dropping unnecessary metadata
X = predictors_df.drop('SAMPLE NAME', axis=1)
Y = outcomes_df.drop('MECHANISM', axis=1) # Full DF must iterate through

# Split Data
x_train, x_test, y_train, y_test = train_test_split(X, Y['QACS'], test_size=0.4)

X100 = shap.utils.sample(X, 100)  # 100 instances for use as the background distribution

# a simple linear model
model = XGBRegressor()
model.fit(x_train, y_train)

# compute the SHAP values
explainer = shap.Explainer(model.predict, X100)
shap_values = explainer(x_test)

# Plotting Waterfall Graph for Feature Contributions
sample_ind = 20
shap.plots.waterfall(shap_values[sample_ind], max_display=14)

# Plotting Force Graph for Feature Contributions
shap.plots.force(shap_values[0])

# F(X) is the predicted bonus for this specific sample (that one being the very last sample)
# The actual bonus is 18 flat
# E(F(X)) is the average predicted gene value across all samples for QACs