In [None]:
import scipy.io
import numpy as np
from sklearn.metrics import auc
import pandas as pd
from scipy import stats
from statsmodels.stats import multitest
from multipy.fdr import qvalue
import seaborn as sns
from matplotlib import pyplot as plt

# Limits of global efficiency after random attacks
1. Pour chaque iteration (240), faire l’AUC (en prenant 30-100%) pour chaque patient; réduction de la matrice à 240X1X15
2. Moyenner les valeurs ainsi obtenues pour chaque patient pour arriver à 15x1.

In [None]:
hc_data_path = '/Users/jk1/unige_onedrive/OneDrive - unige.ch/BCT/attacks240/attack_HC/rnd_attack_degrees_HC_05-12-2020 16-06v4.mat'
st0_data_path = '/Users/jk1/unige_onedrive/OneDrive - unige.ch/BCT/attacks240/attack_ST01/rnd_attack_degrees_ST01_05-12-2020 17-46v4.mat'
st1_data_path = '/Users/jk1/unige_onedrive/OneDrive - unige.ch/BCT/attacks240/attack_ST02/rnd_attack_degrees_ST02_05-12-2020 20-16v4.mat'
st2_data_path = '/Users/jk1/unige_onedrive/OneDrive - unige.ch/BCT/attacks240/attack_ST03/rnd_attack_degrees_ST03_05-13-2020 18-19v4.mat'

In [None]:
# global eff after attack
outcome_var = 'GlobEff_bin_new'
minimum_connectivity_threshold = 0.3

For this variable, data is available for the 8 remaining threshold bins [0.3-1.0]

In [None]:
hc_data_mat = scipy.io.loadmat(hc_data_path)
st0_data_mat = scipy.io.loadmat(st0_data_path)
st1_data_mat = scipy.io.loadmat(st1_data_path)
st2_data_mat = scipy.io.loadmat(st2_data_path)

In [None]:
n_hc = len(hc_data_mat[outcome_var][0][0][0])
n_st = len(st0_data_mat[outcome_var][0][0][0])
n_bins = 11
n_rois = 240

In [None]:
def parse_mat_file(data_mat, n_subj, n_rois, outcome_var):
    # parsing matlab matrix (roi, subj, bin) to obtain np array (subj, bin, roi)
    glob_eff_random_attack = np.moveaxis(np.squeeze([[np.vstack(data_mat[outcome_var][0][0][roi_idx][subj_idx])
                                 for subj_idx in range(n_subj)]
                                    for roi_idx in range(n_rois)]), 0, -1)
    return glob_eff_random_attack

In [None]:
hc_glob_eff_random_attack = parse_mat_file(hc_data_mat, n_hc, n_rois, outcome_var)
st0_glob_eff_random_attack = parse_mat_file(st0_data_mat, n_st, n_rois, outcome_var)
st1_glob_eff_random_attack = parse_mat_file(st1_data_mat, n_st, n_rois, outcome_var)
st2_glob_eff_random_attack = parse_mat_file(st2_data_mat, n_st, n_rois, outcome_var)

In [None]:
# correct for missing values - (pt1 TP1, pt5 Tp2, Pt13 (=pt17) TP3)
st0_glob_eff_random_attack = np.insert(st0_glob_eff_random_attack, 0, np.full((n_bins, n_rois), np.NaN), axis=0)
st1_glob_eff_random_attack = np.insert(st1_glob_eff_random_attack, 4, np.full((n_bins, n_rois), np.NaN), axis=0)
st2_glob_eff_random_attack = np.insert(st2_glob_eff_random_attack, 12, np.full((n_bins, n_rois), np.NaN), axis=0)

In [None]:
# use auc only over predefined area of thresholds
def custom_auc(values_over_thresholds):
    # only analyse thresholds above minimum_connectivity_threshold
    minimum_connectivity_threshold_index = int(minimum_connectivity_threshold*10)  # here thresholds start at bin0
    connectivity_thresholds = np.arange(minimum_connectivity_threshold, 1.1, 0.1)

    return auc(connectivity_thresholds,
                values_over_thresholds[minimum_connectivity_threshold_index:])

In [None]:
# take integral AUC over [0.3-1.0] interval
hc_gEff_auc_random_attack = np.apply_along_axis(custom_auc, arr=hc_glob_eff_random_attack, axis=1)
st0_gEff_auc_random_attack = np.apply_along_axis(custom_auc, arr=st0_glob_eff_random_attack, axis=1)
st1_gEff_auc_random_attack = np.apply_along_axis(custom_auc, arr=st1_glob_eff_random_attack, axis=1)
st2_gEff_auc_random_attack = np.apply_along_axis(custom_auc, arr=st2_glob_eff_random_attack, axis=1)

In [None]:
# plot mean AUC over number of attacked ROIs
over_ROIs_df = pd.DataFrame({'n_rois': range(1,241),
                            'HC': hc_gEff_auc_random_attack.mean(axis=0),
                             'ST0': np.nanmean(st0_gEff_auc_random_attack, axis=0),
                             'ST1': np.nanmean(st1_gEff_auc_random_attack, axis=0),
                             'ST2': np.nanmean(st2_gEff_auc_random_attack, axis=0)})

vertical_over_ROIs_df = over_ROIs_df.melt(id_vars=['n_rois'],
                  value_vars=['HC', 'ST0', 'ST1', 'ST2'],
                  var_name='group',
                  value_name='auc')

In [None]:
sns.scatterplot(x='n_rois', y='auc', hue='group',
                data=vertical_over_ROIs_df[vertical_over_ROIs_df['group'].isin(['HC', 'ST2'])],
                alpha=0.99, s=2)

In [None]:
over_ROIs_df['delta_hc_st2'] = over_ROIs_df['HC'] - over_ROIs_df['ST2']

ax = sns.scatterplot(x='n_rois', y='delta_hc_st2', hue='delta_hc_st2',
                data=over_ROIs_df,
                alpha=0.99, s=5)
ax.set_title('Evolution of delta HC - ST2')

In [None]:
def prepare_array_to_df(array, group_name):
    df = pd.DataFrame(array.T)
    df['n_rois_deleted'] = range(1,241)
    df = df.melt(id_vars=['n_rois_deleted'],
                      var_name='subject',
                      value_name='gEff_auc')
    df['group'] = group_name
    return df


In [None]:

all_subj_ROIs_df = pd.concat([prepare_array_to_df(hc_gEff_auc_random_attack, 'HC'),
                              prepare_array_to_df(st0_gEff_auc_random_attack, 'ST0'),
                              prepare_array_to_df(st1_gEff_auc_random_attack, 'ST1'),
                              prepare_array_to_df(st2_gEff_auc_random_attack, 'ST2')])

In [None]:
ax = sns.scatterplot(x='n_rois_deleted', y='gEff_auc', hue='group',
                data=all_subj_ROIs_df[all_subj_ROIs_df['group'].isin(['HC', 'ST2'])],
                alpha=0.5, s=0.1, y_jitter=0.5)

ax.set_xlim(0,100)
ax.set_ylim(0.15,0.5)
# ax.set_yscale('log')

Perform t-test at every n_rois_deleted to check when difference between HC and ST2 becomes insignificant


In [None]:
# limit number of deleted ROIs to 238, as there is no difference for the last two ROIs (resulting in pval NaN)
max_deleted_ROIs = 238

pvals_per_n_rois_deleted = np.array([stats.ttest_ind(
    hc_gEff_auc_random_attack[:, n_rois_deleted],
    st2_gEff_auc_random_attack[~np.isnan(st2_gEff_auc_random_attack).all(axis=1)][:, n_rois_deleted],
    equal_var=False)[1] for n_rois_deleted in range(max_deleted_ROIs)])

In [None]:
sns.scatterplot(y=pvals_per_n_rois_deleted, x=range(1,max_deleted_ROIs+1))

In [None]:
_, corrected_bh_fdr_pvals, _, _ = multitest.multipletests(pvals_per_n_rois_deleted, method='fdr_bh')

In [None]:
ax = sns.scatterplot(y=corrected_bh_fdr_pvals, x=range(1,max_deleted_ROIs+1))

ax.set_yscale('log')
ax.set_xlim(200, 240)

# Limits of global efficiency after repeated random attacks

In [None]:
hc_rep_data_path = '/Users/jk1/temp/stroke_resilience/output/repeated_random_attack/HC_rep100_rng_attack.mat'
st2_rep_data_path = '/Users/jk1/temp/stroke_resilience/output/repeated_random_attack/ST3_rep100_rng_attack.mat'

In [None]:
hc_rep_data_mat = scipy.io.loadmat(hc_rep_data_path)[outcome_var]
st2_rep_data_mat = scipy.io.loadmat(st2_rep_data_path)[outcome_var]

In [None]:
n_iterations = len(hc_rep_data_mat[0][0])
n_rois = len(hc_rep_data_mat[0][0][0][0][0])

In [None]:
def parse_rep_mat_file(data_mat, n_rois):
    # parsing matlab matrix (iteration, roi, subj, bin) to obtain np array (iteration, subj, bin, roi)
    n_subj = len(data_mat[0][0][0][0][0][0])
    glob_eff_rep_random_attack = np.squeeze([[[np.vstack(data_mat[0][0][iteration][0][0][roi_idx][subj_idx])
                                 for subj_idx in range(n_subj)]
                                    for roi_idx in range(n_rois)]
                                        for iteration in range(n_iterations)])
    return glob_eff_rep_random_attack

In [None]:
hc_rep_data = parse_rep_mat_file(hc_rep_data_mat, n_rois)
st2_rep_data = parse_rep_mat_file(st2_rep_data_mat, n_rois)

In [None]:
# take integral AUC over [0.3-1.0] interval
hc_gEff_auc_rep_random_attack = np.apply_along_axis(custom_auc, arr=hc_rep_data, axis=-1)
st2_gEff_auc_rep_random_attack = np.apply_along_axis(custom_auc, arr=st2_rep_data, axis=-1)

In [None]:
# plot mean AUC over number of attacked ROIs
rep_over_ROIs_df = pd.DataFrame({'n_rois': range(1,241),
                            'HC': hc_gEff_auc_rep_random_attack.mean(axis=(0, -1)),
                             'ST2': np.nanmean(st2_gEff_auc_rep_random_attack, axis=(0, -1))})

vertical_rep_over_ROIs_df = rep_over_ROIs_df.melt(id_vars='n_rois',
                                                  value_vars=['HC', 'ST2'],
                                                  var_name='group',
                                                  value_name='gEff_auc')

In [None]:

ax = sns.scatterplot(x='n_rois', y='gEff_auc', hue='group',
                data=vertical_rep_over_ROIs_df,
                alpha=0.99, s=2)

In [None]:
rep_over_ROIs_df['delta_hc_st2'] = np.abs(rep_over_ROIs_df['HC'] - rep_over_ROIs_df['ST2'])

ax = sns.scatterplot(x='n_rois', y='delta_hc_st2', hue='n_rois',
                data=rep_over_ROIs_df,
                alpha=0.99, s=5, legend=False)
ax.set_xlabel('Number of deleted network nodes')
ax.set_ylabel('Difference in global efficiency')
# Hide the right and top spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
# ax.set_title('Evolution of the difference in global efficiency between controls and subjects at timepoint 3')
fig = ax.get_figure()
plt.show()

In [None]:
delta_st2_hc_over_rois_over_iterations = st2_gEff_auc_rep_random_attack.mean(axis=-1) - hc_gEff_auc_rep_random_attack.mean(axis=-1)
delta_st2_hc_over_rois_over_iterations_df = pd.DataFrame(delta_st2_hc_over_rois_over_iterations)
delta_st2_hc_over_rois_over_iterations_df['iteration'] = range(n_iterations)
vert_delta_st2_hc_over_rois_over_iterations_df = delta_st2_hc_over_rois_over_iterations_df.melt(id_vars='iteration',
                                                                                                value_vars=range(240),
                                                                                                var_name='n_rois',
                                                                                                value_name='delta_st2_hc')

In [None]:
# plot mean difference in global efficiency between HC and ST2 for each number of deleted ROIs for each iteration
ax = sns.scatterplot(x='n_rois', y='delta_st2_hc', hue='n_rois',
                data=vert_delta_st2_hc_over_rois_over_iterations_df,
                alpha=0.2, s=2, legend=False, y_jitter=0.5)
ax.set_xlabel('Number of deleted network nodes')
ax.set_ylabel('Difference in global efficiency')
# Hide the right and top spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
# ax.set_title('Evolution of the difference in global efficiency between controls and subjects at timepoint 3')
fig = ax.get_figure()
plt.show()

In [None]:
fig.savefig('diff_eglob_hc_st2_repeated100_random_attack.tiff', format='tiff', dpi=1200)

Perform t-test at every n_rois_deleted to check when difference between HC and ST2 becomes insignificant


In [None]:
# limit number of deleted ROIs to 238, as there is no difference for the last two ROIs (resulting in pval NaN)
max_deleted_ROIs = 238

pvals_per_rep_n_rois_deleted = np.array([[stats.ttest_ind(
    hc_gEff_auc_rep_random_attack[iteration, n_rois_deleted],
    st2_gEff_auc_rep_random_attack[iteration, n_rois_deleted],
    equal_var=False)[1] for n_rois_deleted in range(max_deleted_ROIs)]
                                     for iteration in range(n_iterations)])

In [None]:
pvals_per_rep_n_rois_deleted_df = pd.DataFrame(pvals_per_rep_n_rois_deleted,
                                               index=range(n_iterations),
                                               columns=range(1,max_deleted_ROIs+1))
pvals_per_rep_n_rois_deleted_df['n_rep_iter'] = range(n_iterations)
pvals_per_rep_n_rois_deleted_df = pvals_per_rep_n_rois_deleted_df.melt(id_vars=['n_rep_iter'],
                                     value_vars=range(1,max_deleted_ROIs+1),
                                     var_name='n_rois_deleted',
                                     value_name='pval')

In [None]:
ax = sns.scatterplot(y='pval', x='n_rois_deleted', hue='n_rois_deleted',
                data=pvals_per_rep_n_rois_deleted_df,
                alpha=0.99, s=2)

In [None]:
corrected_bh_fdr_rep_pvals = multitest.multipletests(pvals_per_rep_n_rois_deleted.flatten(), method='fdr_bh')[1]\
                                        .reshape(pvals_per_rep_n_rois_deleted.shape)

In [None]:
corrected_bh_fdr_rep_pvals_df = pd.DataFrame(corrected_bh_fdr_rep_pvals,
                                             index=range(n_iterations),
                                             columns=range(1,max_deleted_ROIs+1))
corrected_bh_fdr_rep_pvals_df['n_rep_iter'] = range(n_iterations)
corrected_bh_fdr_rep_pvals_df = corrected_bh_fdr_rep_pvals_df.melt(id_vars=['n_rep_iter'],
                                     value_vars=range(1,max_deleted_ROIs+1),
                                     var_name='n_rois_deleted',
                                     value_name='corrected_bh_fdr_pval')

In [None]:
from matplotlib import ticker

ax = sns.scatterplot(y='corrected_bh_fdr_pval', x='n_rois_deleted', hue='n_rois_deleted',
                data=corrected_bh_fdr_rep_pvals_df,
                alpha=0.4, s=2, legend=False)

# add line with median
# sns.lineplot(x=range(1,max_deleted_ROIs+1), y=np.median(corrected_bh_fdr_rep_pvals, axis=0),
#              color=sns.color_palette('inferno')[0],
#              ax=ax, label='median', alpha=0.3)

ax.set_xlabel('Number of deleted network nodes')
ax.set_ylabel('p-value after FDR correction')
ax.set_ylim(0.0, 0.2)
# change y tick frequency to 0.05
ax.yaxis.set_major_locator(ticker.MultipleLocator(0.05))

# Hide the right and top spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
fig = ax.get_figure()
plt.show()


In [None]:
fig.savefig('limits_of_resilience_after_repeated100_random_attack.tiff', format='tiff', dpi=1200)