# Local SHAP calculation in model_predicted old and young subjects

In [None]:
import numpy as np
import pandas as pd
import os,re,glob
import shap
import joblib

## Import data

In [None]:
ipt_DIR = "../../1_data_processing/processed_data/"
y = pd.read_csv(os.path.join(ipt_DIR, "y.csv"), index_col="group.cmp")
X = pd.read_csv(os.path.join(ipt_DIR, "X_scaled.csv"), index_col="group.cmp")
list_group_cmp = X.index.tolist()

## import model

In [None]:
ipt_DIR_male = "../../2_model_construction/lasso/out/MAPE/0.99/Lasso/male/tsfresh/"

In [None]:
models_male = []
for i in range(5):
    bestDIR = ipt_DIR_male+"optuna/outer_"+str(i)+"/bestmodel.pkl"
    print(bestDIR)
    best_lasso_cv = joblib.load(bestDIR)
    models_male.extend(best_lasso_cv['estimator'])

# import coefficients used

In [None]:
coef_male = pd.read_csv(ipt_DIR_male + "feature_coefs.csv",index_col=0)
features_male = list(coef_male.columns)
print(len(features_male))

# import folds used

In [None]:
#import 5-folds
folds_out = joblib.load(os.path.join(ipt_DIR,"indices_5folds.pkl"))

# import matched data

In [None]:
df_matched = pd.read_csv("../../4_Figure4/out/pred_vs_true_matched_male_and_female.csv", index_col=0).set_index("group.cmp")

In [None]:
tmp_df_matched_male = df_matched.query("`SEX.男1.女0`==1")

## calculate shap values of pred_old and young in male best model

In [None]:
df_male = pd.DataFrame()

for k in range(len(folds_out)):
    # select test, train, model sets in a fold
    train_rows_index_tmp = y.loc[(y["SEX.男1.女0"]==1) & (y.index.isin(folds_out[k][0]))].index
    test_rows_index_tmp = y.loc[(y["SEX.男1.女0"]==1) & (y.index.isin(folds_out[k][1]))].index

    # all train data were used
    x_train_tmp = X.loc[train_rows_index_tmp,features_male]
    
    x_test_tmp = X.loc[test_rows_index_tmp,features_male]
    x_test_tmp = x_test_tmp[x_test_tmp.index.isin(tmp_df_matched_male.index)]

    test_rows_index = x_test_tmp.index
    
    print(x_test_tmp.shape)
    # calculate shap
    model_tmp = models_male[k*5:(k+1)*5]
    explainers = []
    shap_values = []
    for j in range(5):
        explainer = shap.KernelExplainer(model_tmp[j].predict, x_train_tmp)
        explainers.append(explainer)
        shap_value = explainer.shap_values(x_test_tmp)
        shap_values.append(shap_value)
    
    shap_values_ave = np.mean(shap_values,axis=0)

    tmp_df = pd.DataFrame(shap_values_ave, test_rows_index)
    tmp_df.columns = features_male
    tmp_df = pd.merge(tmp_df_matched_male, tmp_df, left_index=True, right_index=True)
    
    df_male = pd.concat([df_male, tmp_df],axis=0)

In [None]:
df_male.to_csv("../out/shap_male.csv")