In [1]:
import mne
import os
from prediction_utils import *
from statsmodels.stats.outliers_influence import variance_inflation_factor

gof_thresh_average = 75
amplitude_thresh_average = 20*1e-9 #20 nAm threshold

responses = ['n15','p30','n45','p60','mep']
pos_names = ['n15','p30','n45','p60','handknob']
where = "source"

feature_path_base = r"D:\REFTEP_ALL\Features"
source_path_base =r"D:\REFTEP_ALL\Source_analysis"

sites = ['Tuebingen','Aalto']
freq_range_names = ['theta','alpha','beta','gamma']
labels_all_aparc = mne.read_labels_from_annot("fsaverage", "aparc", "both", subjects_dir=r"D:\REFTEP_ALL\REFTEP_reco\Aalto_recon_all")
label_names_aparc = [label.name for label in labels_all_aparc if "unknown" not in label.name]
#define which params to use
usepsd = True
usecoil = True
rejcoil = False
usephase = False
usepac = False
models_path = fr"D:\REFTEP_ALL\Models_Aalto_Tuebingen_phase_{usephase}_usecoil_{usecoil}_rejcoil_{rejcoil}"
usetime = [True, 'sample', {'Aalto':False,'Tuebingen':False}, "preprocessed_index"]
phasefreqs = ['alpha']
if usetime[0]:
    interaction_variables_with_time = ['PSD_' + freq for freq in freq_range_names]
else:
    interaction_variables_with_time = []

    
angle_diff_default_normal=1.106214876384344
angle_diff_default_dir=1.2227792988271589
pos_diff_default=2.091002900606377

if usecoil and rejcoil:
    distance_thresh=5
    angle_distance_thresh =10
elif usecoil and not rejcoil:
    distance_thresh=np.inf
    angle_distance_thresh =np.inf
else:
    distance_thresh=None
    angle_distance_thresh =None
    print("not using coil")
combine_normal_ori = False
combine_all = True
re_formula_now = False

Reading labels from parcellation...
   read 35 labels from D:\REFTEP_ALL\REFTEP_reco\Aalto_recon_all\fsaverage\label\lh.aparc.annot
   read 34 labels from D:\REFTEP_ALL\REFTEP_reco\Aalto_recon_all\fsaverage\label\rh.aparc.annot


In [2]:
for parctype in ['aparc']:
    datas = []
    spatial_names_subjects = []
    for response_ind, response in enumerate(['mep']): #go through different response types
        #define "source" spatial names, i.e. anatomical labels or functional labels
        if response_ind == 0: #only load features once to save time
            for site in sites:
                features_path_site = os.path.join(feature_path_base,f'Features_{site}')
                source_path_site = os.path.join(source_path_base,f'Source_analysis_{site}')
                for subind, subject in enumerate(os.listdir(features_path_site)):
                    if subind==0: #only read once
                        if parctype == "aparc":
                            spatial_names = label_names_aparc
                        elif parctype == str(['n15', 'p30', 'n45', 'p60', 'handknob']):
                            spatial_names_orig = []
                            for pos_name in pos_names:
                                if pos_name != "handknob":
                                    custom_label_file = f"{subject}_{pos_name}_label_fsaverage"
                                else:
                                    custom_label_file = f"{pos_name}_label_fsaverage"
                                labels_all_now = mne.read_labels_from_annot("fsaverage", custom_label_file, "both", subjects_dir=r"D:\REFTEP_ALL\REFTEP_reco\Aalto_recon_all", verbose=False)
                                labels_now = [label.name for label in labels_all_now if "unknown" not in label.name]
                                spatial_names_orig = spatial_names_orig + labels_now
                            spatial_names_orig_splitted = [name.split("_") for name in spatial_names_orig]
                            spatial_names = [name[1] +  "_" + name[2] + "_" + name[3][:-3] for name in spatial_names_orig_splitted] #"around_xxx_label", ignoring hemisphere and subject
                        else:
                            raise ValueError(f"bad parameter: parctype: {parctype}")
                    where_now = f'{where}/{subject}_{parctype}' #where to find the features from now for this parcellation
                    data_subject = load_data_subject(subject, site, features_path_site, where_now, spatial_names, freq_range_names, usepsd, usecoil,
                                                      usephase, usepac, phasefreqs, usetime, distance_thresh, angle_distance_thresh, angle_diff_default_normal,angle_diff_default_dir, pos_diff_default, combine_normal_ori, combine_all, usecoil_sigma=1e-3)
                    datas.append(data_subject)
        good_subjects_this_response = []
        responses_all = []
        idx = 0
        for site in sites:
            source_path_site = os.path.join(source_path_base,f'Source_analysis_{site}')
            for subject in os.listdir(source_path_site):
                amplitudes, average_gof, average_amp = load_responses_subject(source_path_site, subject, response)
                #only use data from these subjects in this response
                if (average_gof >= gof_thresh_average and average_amp >= amplitude_thresh_average) or response == 'mep':
                    good_subjects_this_response.append(idx)
                    responses_all.append(amplitudes)
                idx += 1
        datas_to_use = [datas[ind] for ind in good_subjects_this_response]
        #print(f'Using {len(datas_to_use)} subjects for {response}')
        if parctype == 'aparc':
            spatial_names_now = spatial_names
        else:
            continue
            #spatial_names_now = [name for name in spatial_names if response in name or 'handknob' in name]
        vif_dfs = []
        ref_site = None
        predictors_test = ['PSD_theta','PSD_alpha','PSD_beta','PSD_gamma','PSD_theta_x_Latency','PSD_alpha_x_Latency','PSD_beta_x_Latency','PSD_gamma_x_Latency','diff_coil','Latency']
        for name in spatial_names_now:
            #create a dataframe
            df = create_df_with_features(datas_to_use, responses_all, name, freq_range_names, usephase, usepac, usecoil, phasefreqs, usetime, combine_normal_ori, combine_all)
            #df = df.dropna() #drop nan values
            df, explanatory_variables_numeric = create_scaled_df(df, interaction_variables_with_time, usephase) #scale explanatory variables
            #add interaction effects
            interaction_vars = []
            for var in interaction_variables_with_time:
                interaction_vars += [var + '_x_Latency']
                df[var + '_x_Latency'] = df[var] * df['Latency']
            df_new = df.copy()
            if ref_site is not None:
                new_ref_site = "AAAA" + ref_site
                df_new['Site'] = df_new['Site'].replace(ref_site,new_ref_site)
            #compute the VIF values
            X = df_new[predictors_test].copy()
            vif_df = pd.DataFrame()
            vif_df['Feature'] = X.columns
            vif_df['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
            vif_dfs.append(vif_df)
        all_vifs = pd.concat(vif_dfs, ignore_index=True)
        rename_mapping_features = {'PSD_alpha':'Alpha power','PSD_beta':'Beta power','PSD_gamma':'Gamma power','PSD_theta':'Theta power','diff_coil':'Coil control',
                                   'PSD_theta_x_Latency':'Theta power x Time','PSD_alpha_x_Latency':'Alpha power x Time','PSD_beta_x_Latency':'Beta power x Time','PSD_gamma_x_Latency':'Gamma power x Time','Latency':'Time'}
        summary_vif = all_vifs.groupby('Feature')['VIF'].agg(['mean','std']).reset_index().rename(columns={'mean':'VIF mean','std':'VIF STD'})
        summary_vif['Feature'] = summary_vif['Feature'].map(rename_mapping_features)
        summary_vif['VIF mean'] = summary_vif['VIF mean']
        summary_vif['VIF STD'] = summary_vif['VIF STD']
        print(summary_vif.to_latex(index=False,float_format="%.2f"))
        print(summary_vif.to_latex(index=False))

Using default difference values of position diff: 2.091002900606377 mm and normal and ori: 1.106214876384344 and 1.2227792988271589 degrees as D:\REFTEP_ALL\Features\Features_Tuebingen\sub-018\sub-018_stimulations_final.mat was not found for sub-018.
\begin{tabular}{lrr}
\toprule
Feature & VIF mean & VIF STD \\
\midrule
Time & 1.03 & 0.01 \\
Alpha power & 1.20 & 0.05 \\
Alpha power x Time & 1.22 & 0.05 \\
Beta power & 1.98 & 0.55 \\
Beta power x Time & 2.12 & 0.64 \\
Gamma power & 1.80 & 0.58 \\
Gamma power x Time & 1.92 & 0.66 \\
Theta power & 1.08 & 0.01 \\
Theta power x Time & 1.08 & 0.02 \\
Coil control & 1.01 & 0.00 \\
\bottomrule
\end{tabular}

\begin{tabular}{lrr}
\toprule
Feature & VIF mean & VIF STD \\
\midrule
Time & 1.031686 & 0.007333 \\
Alpha power & 1.200123 & 0.047085 \\
Alpha power x Time & 1.217467 & 0.051507 \\
Beta power & 1.977876 & 0.551449 \\
Beta power x Time & 2.115426 & 0.641461 \\
Gamma power & 1.796270 & 0.577111 \\
Gamma power x Time & 1.921381 & 0.663536 \\