# Computing SHAP Values
For the purpose of testing, we have made available our training dataset, which includes both eyes-open and eyes-closed features for the 12 regional interpolated channels. The model parameters have been configured to match those of our top-performing model for this dataset.

In [None]:
import pandas as pd
import shap
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder
import pickle

In [None]:
def load_object(fname):
    try:
        with open(fname + ".pickle", "rb") as f:
            return pickle.load(f)
    except Exception as ex:
        print("Error during unpickling object (Possibly unsupported):", ex)


data = load_object('../../data/example_training_set/training_set')

In [None]:
x = data['x']
groups = data['group']
y_raw = data['y']
x_names = data['x_names']

print(f"Dataset contains {len(x)} Samples of size {len(x[0])}")

In [None]:
y_skf = [int(age) for age in y_raw]
skf_vals = []
skf = StratifiedGroupKFold(n_splits=3, shuffle=True, random_state=126)
for fold, (train_index, test_index) in enumerate(skf.split(x, y_skf, groups)):
    skf_vals.append((train_index, test_index))

In [None]:
y = [int(age*10) for age in y_raw]

le = LabelEncoder()
le.fit(y)
y = le.transform(y)

In [None]:
model_param = {
    'n_estimators': 5000,
    'learning_rate': 0.0284529908658767,
    'max_depth': 3,
    'subsample': 0.7,
    'colsample_bytree': 0.3,
    'booster': 'gbtree',
    'reg_lambda': 1,
    'reg_alpha': 10,
    'gamma': 1.0
}

In [None]:
best_fold = 0
best_score = 5
best_model = None
for fold in range(len(skf_vals)):
    x_train = [x[i] for i in skf_vals[fold][0]]
    x_test = [x[i] for i in skf_vals[fold][1]]
    y_train = [y[i] for i in skf_vals[fold][0]]
    y_test = [y[i] for i in skf_vals[fold][1]]

    model = XGBRegressor(
        **model_param
    )
    model.fit(
      x_train,
      y_train,
      early_stopping_rounds=400,
      eval_set=[(x_test, y_test)])

    preds = model.predict(x_test)
    mae = mean_absolute_error(y_test, preds)
    if mae < best_score:
        best_fold = fold
        best_score = mae
        best_model = model

In [None]:
x_train = [x[i] for i in skf_vals[best_fold][0]]
x_test = [x[i] for i in skf_vals[best_fold][1]]
y_train = [y[i] for i in skf_vals[best_fold][0]]
y_test = [y[i] for i in skf_vals[best_fold][1]]

x_train_df = pd.DataFrame(x_train, columns=x_names)
x_test_df = pd.DataFrame(x_test, columns=x_names)

explainer = shap.TreeExplainer(best_model)
shap_values = explainer.shap_values(x_test_df)