In [None]:
import pandas as pd
import numpy as np
import scipy
from sklearn.linear_model import ElasticNet, ElasticNetCV
from sklearn.model_selection import RepeatedKFold, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scripts.python.routines.betas import betas_drop_na
from plotly.subplots import make_subplots
from numpy.ma import masked_array
from scipy import stats
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pickle
import random
import plotly.express as px
import copy
import statsmodels.formula.api as smf
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scripts.python.pheno.datasets.filter import filter_pheno
from scripts.python.pheno.datasets.features import get_column_name, get_status_dict, get_sex_dict
from scripts.python.routines.plot.scatter import add_scatter_trace
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pathlib
from scripts.python.routines.manifest import get_manifest
from scripts.python.routines.plot.save import save_figure
from scripts.python.routines.plot.layout import add_layout, get_axis
from scripts.python.routines.plot.p_value import add_p_value_annotation
from statsmodels.stats.multitest import multipletests
from sklearn.metrics import mean_absolute_error
import plotly.io as pio
pio.kaleido.scope.mathjax = None
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=False)
from pathlib import Path
from functools import reduce
from scipy.stats import chi2_contingency
from scipy.stats import kruskal, mannwhitneyu
from impyute.imputation.cs import fast_knn, mean, median, random, mice, mode, em
from scripts.python.eeg.routines import plot_32_electrodes_scatter, plot_32_electrodes_bar, plot_32_electrodes_beeswarm

# Prepare data

In [None]:
path = "E:/YandexDisk/EEG/experiments"

exp_type = '1st_day'
exp_sub_type = 'real'
model = 'xgboost'
exp_date = '2022-07-02_12-09-58'

class_column = 'class_simp'

df_data = pd.read_excel(f"{path}/{exp_type}/data.xlsx", index_col="index")

df_features = pd.read_excel(f"{path}/{exp_type}/features_freq.xlsx", index_col="features")
features = df_features.index.values

df_classes = pd.read_excel(f"{path}/{exp_type}/classes/{exp_sub_type}.xlsx")
classes = df_classes[class_column].values

df_data = df_data.loc[df_data[class_column].isin(classes), :]
subjects = sorted(df_data['subject'].unique(), key=lambda x: float(x[1::]))
samples = df_data.index.values

path_load = f"{path}/{exp_type}/models/{exp_type}_{exp_sub_type}_trn_val_{model}/runs/{exp_date}"
path_save = f"{path}/special/001_32_channels_highlight/{exp_type}_{exp_sub_type}_trn_val_{model}"
pathlib.Path(f"{path_save}").mkdir(parents=True, exist_ok=True)

df_importance = pd.read_excel(f"{path_load}/feature_importances.xlsx", index_col='feature')
missed_features = set(df_features.index) - set(df_importance.index)
df_missed = pd.DataFrame(index=missed_features, columns=['importance'], data=np.zeros(len(missed_features)))
df_importance = pd.concat([df_importance,df_missed])

df_predictions = pd.read_excel(f"{path_load}/predictions.xlsx", index_col='index')

# SHAP global (train)

In [None]:
shap_part = 'trn'

pathlib.Path(f"{path_save}/shap_{shap_part}/left").mkdir(parents=True, exist_ok=True)
pathlib.Path(f"{path_save}/shap_{shap_part}/right").mkdir(parents=True, exist_ok=True)
df_shap_left = pd.read_excel(f"{path_load}/shap/{shap_part}/shap_left_{exp_sub_type}.xlsx", index_col='index')
shap_samples = df_shap_left.index.values
shap_subjects = sorted(df_data.loc[df_data.index.isin(df_shap_left.index), 'subject'].unique(), key=lambda x: float(x[1::]))
df_shap_left = df_shap_left.T
df_shap_right = pd.read_excel(f"{path_load}/shap/{shap_part}/shap_right_{exp_sub_type}.xlsx", index_col='index')
df_shap_right = df_shap_right.T

dict_subj_left = {}
dict_subj_right = {}
for subj in shap_subjects:
    subj_columns = df_shap_left.columns.values[df_shap_left.columns.str.contains(subj)]
    left_columns = [s for s in subj_columns if "left" in s]
    right_columns = [s for s in subj_columns if "right" in s]
    dict_subj_left[subj] = left_columns
    dict_subj_right[subj] = right_columns

for feat in features:
    df_shap_left.at[feat, 'mean_abs_shap'] = np.mean(np.abs(df_shap_left.loc[feat, shap_samples].values))
    df_shap_right.at[feat, 'mean_abs_shap'] = np.mean(np.abs(df_shap_right.loc[feat, shap_samples].values))
    for subj in shap_subjects:
        df_shap_left.at[feat, f"{subj}_left"] = np.mean(df_shap_left.loc[feat, dict_subj_left[subj]].values)
        df_shap_left.at[feat, f"{subj}_right"] = np.mean(df_shap_left.loc[feat, dict_subj_right[subj]].values)
        df_shap_right.at[feat, f"{subj}_left"] = np.mean(df_shap_right.loc[feat, dict_subj_left[subj]].values)
        df_shap_right.at[feat, f"{subj}_right"] = np.mean(df_shap_right.loc[feat, dict_subj_right[subj]].values)

df_shap_left.sort_values(['mean_abs_shap'], ascending=[False], inplace=True)
df_shap_right.sort_values(['mean_abs_shap'], ascending=[False], inplace=True)

plot_32_electrodes_scatter(df_shap_left, 'mean_abs_shap', "Mean(|SHAP values|)", "min2max", f"{path_save}/shap_{shap_part}/left")
plot_32_electrodes_bar(df_shap_left, 'mean_abs_shap', "Mean(|SHAP values|)", "min2max", f"{path_save}/shap_{shap_part}/left")
plot_32_electrodes_beeswarm(df_shap_left, df_data, shap_samples , "SHAP values", f"{path_save}/shap_{shap_part}/left")
plot_32_electrodes_scatter(df_shap_right, 'mean_abs_shap', "Mean(|SHAP values|)", "min2max", f"{path_save}/shap_{shap_part}/right")
plot_32_electrodes_bar(df_shap_right, 'mean_abs_shap', "Mean(|SHAP values|)", "min2max", f"{path_save}/shap_{shap_part}/right")
plot_32_electrodes_beeswarm(df_shap_right, df_data, shap_samples , "SHAP values", f"{path_save}/shap_{shap_part}/right")

# SHAP local (val)

In [None]:
shap_part = 'val'

pathlib.Path(f"{path_save}/shap_{shap_part}").mkdir(parents=True, exist_ok=True)
df_shap_left = pd.read_excel(f"{path_load}/shap/{shap_part}/shap_left_{exp_sub_type}.xlsx", index_col='index')
shap_samples = df_shap_left.index.values
shap_subjects = sorted(df_data.loc[df_data.index.isin(df_shap_left.index), 'subject'].unique(), key=lambda x: float(x[1::]))
df_shap_left = df_shap_left.T
df_shap_right = pd.read_excel(f"{path_load}/shap/{shap_part}/shap_right_{exp_sub_type}.xlsx", index_col='index')
df_shap_right = df_shap_right.T

dict_subj_left = {}
dict_subj_right = {}
for subj in shap_subjects:
    subj_columns = df_shap_left.columns.values[df_shap_left.columns.str.contains(subj)]
    left_columns = [s for s in subj_columns if "left" in s]
    right_columns = [s for s in subj_columns if "right" in s]
    dict_subj_left[subj] = left_columns
    dict_subj_right[subj] = right_columns

for feat in features:
    df_shap_left.at[feat, 'mean_abs_shap'] = np.mean(np.abs(df_shap_left.loc[feat, shap_samples].values))
    df_shap_right.at[feat, 'mean_abs_shap'] = np.mean(np.abs(df_shap_right.loc[feat, shap_samples].values))
    for subj in shap_subjects:
        df_shap_left.at[feat, f"{subj}_left"] = np.mean(df_shap_left.loc[feat, dict_subj_left[subj]].values)
        df_shap_left.at[feat, f"{subj}_right"] = np.mean(df_shap_left.loc[feat, dict_subj_right[subj]].values)
        df_shap_right.at[feat, f"{subj}_left"] = np.mean(df_shap_right.loc[feat, dict_subj_left[subj]].values)
        df_shap_right.at[feat, f"{subj}_right"] = np.mean(df_shap_right.loc[feat, dict_subj_right[subj]].values)

df_shap_left.sort_values(['mean_abs_shap'], ascending=[False], inplace=True)
df_shap_right.sort_values(['mean_abs_shap'], ascending=[False], inplace=True)

samples_targ = [
    'S6_T0_right_real',
    'S9_T3_right_real',
    'S14_T4_right_real',

    'S6_T0_left_real',
    'S9_T10_left_real',
    'S14_T7_left_real',

    'S6_T5_left_real',
    'S9_T15_left_real',
    'S14_T18_left_real',
]

for sample in samples_targ:
    pathlib.Path(f"{path_save}/shap_{shap_part}/{sample}/left").mkdir(parents=True, exist_ok=True)
    pathlib.Path(f"{path_save}/shap_{shap_part}/{sample}/right").mkdir(parents=True, exist_ok=True)
    plot_32_electrodes_scatter(df_shap_left, sample, "SHAP values", "minus2plus", f"{path_save}/shap_{shap_part}/{sample}/left")
    plot_32_electrodes_scatter(df_shap_right, sample, "SHAP values", "minus2plus", f"{path_save}/shap_{shap_part}/{sample}/right")