In [1]:
import itertools
import operator
import pickle
import re
from abc import *
from copy import deepcopy
from operator import itemgetter
from typing import *
import numpy as np
import scipy as sp
import networkx as nx
import mne
import time
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors._dist_metrics import DistanceMetric
from sklearn.utils import shuffle
from typing import List, Tuple
from sklearn.model_selection import train_test_split

from nodestimation.learning.connectoming import make_connectome
from nodestimation.learning.estimation import collect_statistic, \
    compute_importance, collect_cross_statistic, make_selection_map, \
    select, separate_datasets, selected_statistic, choose_best, selected_data, make_feature_selection
from nodestimation.learning.informativeness import CrossInformativeness, Informativeness, SubjectsInformativeness, \
    NodesInformativeness
from nodestimation.learning.networking import sparse_graph, graph_to_hemispheres, hemispheres_division_modularity, \
    metric_for_hemispheres
from nodestimation.processing.features import prepare_features
from nodestimation.project import find_subject_dir, conditions_unique_code
from nodestimation.pipeline import pipeline
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import nibabel
import nilearn.plotting as nplt
from nodestimation.project.actions import read
import nodestimation as nd
from nodestimation.learning.modification import append_series, promote
import nodestimation.learning.modification as lmd
from nodestimation.project.subject import Subject
from sklearn.preprocessing import *
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectFromModel
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
import matplotlib as mpl
from nodestimation.learning.selection import SubjectsStatistic, Wilcoxon, Mannwhitneyu, Test
from scipy.stats import wilcoxon
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
from scipy.fftpack import fftfreq, irfft, rfft
from scipy.fftpack import fftfreq, irfft, rfft
from networkx.algorithms.approximation import large_clique_size

ENGEL1 = [
    'B1C2',
    'B1R1',
    'G1R1',
    'G1V2',
    'J1T2',
    'K1V1',
    'L1P1',
    'M1G2',
    'M1N2',
    'O1O2',
    'R1D2',
    'S1A2',
    'S1B1',
    'S1H1',
    'S1U3'
]
ENGEL2 = [
    'L2M1',
    'M2S2',
    'N2K2',
    'P1H2'
]
ENGEL3 = [
    'N3S2',
    'S3R1'
]
ENGEL4 = [
    'K4L2'
]
REJECTED = [
    'S1U3',
    'P1H2'
]

AGE = {
    'B1C2': 28.0,
    'B1R1': 16.0,
    'G1R1': 23.0,
    'G1V2': 5.0,
    'J1T2': 8.0,
    'K1V1': 10.0,
    'K4L2': 14.0,
    'L1P1': 16.0,
    'L2M1': 20.0,
    'M1G2': 8.0,
    'M1N2': 7.0,
    'M2S2': 18.0,
    'N2K2': 30.0,
    'N3S2': 10.0,
    'O1O2': 18.0,
    'R1D2': 6.5,
    'P1H2': 7.0,
    'S1A2': 12.0,
    'S1B1': 17.0,
    'S1H1': 28.0,
    'S3R1': 19.0,
    'S1U3': 15.0,
}

SEX = {
    'B1C2': 'f',
    'B1R1': 'm',
    'G1R1': 'f',
    'G1V2': 'm',
    'J1T2': 'f',
    'K1V1': 'f',
    'K4L2': 'f',
    'L1P1': 'f',
    'L2M1': 'f',
    'M1G2': 'm',
    'M1N2': 'm',
    'M2S2': 'm',
    'N2K2': 'm',
    'N3S2': 'm',
    'O1O2': 'f',
    'R1D2': 'f',
    'P1H2': 'm',
    'S1A2': 'm',
    'S1B1': 'm',
    'S1H1': 'm',
    'S3R1': 'm',
    'S1U3': 'f',
}

CONNECTOMES_KIND = 'initial'

SUBJECTS = pipeline(
    methods=['wpli', 'envelope', 'coh', 'imcoh', 'plv', 'ciplv', 'ppc', 'pli', 'pli2_unbiased', 'wpli2_debiased'],
    freq_bands=[(4, 8), (8, 14), (6, 8), (8, 10)],
)

path = f'/home/user/Documents/NodesEstimation/theta&late_theta&early_alpha&alpha/all_stats_all_subjects_theta&l-theta&e-alpha&alpha.pkl'
stats = pickle.load(open(path, 'rb'))

  warn("Fetchers from the nilearn.datasets module will be "


All computation has been already done, loading of the existing file with the solution...


In [3]:
## check all possible combinations of features and frequencies (nodes)


index = [stat.centrality_metric for stat in stats]

for freq in ['4-8', '6-8', '8-14', '8-10']:
    for feat in ['wpli']: #, 'coh', 'imcoh', 'plv', 'ciplv', 'ppc', 'pli', 'pli2_unbiased', 'wpli2_debiased']:
        series = list()
        features = [f'{freq}Hz_envelope', f'{freq}Hz_{feat}']
        for stat1 in stats:
            curr_series = list()
            for stat2 in stats:
                full_true_data_1 = stat1.datasets['true'][f'{freq}Hz_{feat}']
                full_true_data_2 = stat2.datasets['true'][f'{freq}Hz_envelope']
                full_false_data_1 = stat1.datasets['false_mirror'][f'{freq}Hz_{feat}']
                full_false_data_2 = stat2.datasets['false_mirror'][f'{freq}Hz_envelope']

                full_true_data = pd.concat([full_true_data_1, full_true_data_2], axis=1)

                full_false_data = pd.concat([full_false_data_1, full_false_data_2], axis=1)

                acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

                for i in range(1000):
                    clf = svm.SVC(kernel='sigmoid')

                    scaler = StandardScaler()

                    ## Engel1&2 data
                    engel_1_2_true = full_true_data.loc[(elem[:4] in [*ENGEL1, *ENGEL2] and not elem[:4] in REJECTED for elem in full_true_data.index)]
                    engel_1_2_false = full_false_data.loc[(elem[:4] in [*ENGEL1, *ENGEL2] and not elem[:4] in REJECTED for elem in full_false_data.index)]
                    true_data = engel_1_2_true.assign(resected=True)
                    false_data = engel_1_2_false.assign(resected=False)

                    dataset = pd.concat([true_data, false_data], axis=0)
                    dataset = dataset.sample(frac = 1)

                    y = dataset['resected'].to_numpy()
                    dataset = dataset.drop(['resected'], axis=1)
                    x = scaler.fit_transform(dataset)
                    x_train, x_test, y_train, y_test = train_test_split(x, y)

                    clf.fit(x_train, y_train)
                    pred = clf.predict(x_test)

                    acc.append(accuracy_score(y_test, pred))
                    tn, fp, fn, tp = confusion_matrix(y_test, pred, labels=[0,1]).ravel()
                    if not any([tn + fp == 0, tp + fn == 0, tn + fn == 0, tp + fp == 0]):
                        spec.append(tn / (tn + fp))
                        sens.append(tp / (tp + fn))
                        negpred.append(tn/(tn + fn))
                        pospred.append(tp/(tp + fp))

                curr_series.append(np.array(acc).mean())
                # print(f'{stat1.centrality_metric} for {features[1]},\n{stat2.centrality_metric} for {features[0]}')
                # print('acc: ', np.array(acc).mean())
                # print('spec: ', np.array(spec).mean())
                # print('sens: ', np.array(sens).mean())
            series.append(pd.Series(curr_series))

        df = pd.DataFrame(series, index=index).rename(columns={i: col for i, col in enumerate(index)})
        df.to_csv(f'/home/user/Documents/NodesEstimation/performance-tables/metrics_accuracy_{freq}Hz_{feat}.csv')
        print(f'{features[1]}|{features[0]}')
        print(df)

4-8Hz_wpli|4-8Hz_envelope
                eigen   between     close    degree      info   harmony  \
eigen        0.607619  0.609857  0.606857  0.616143  0.569238  0.610571   
between      0.459952  0.417095  0.456381  0.456381  0.460619  0.458810   
close        0.433667  0.425857  0.439476  0.443048  0.435857  0.435000   
degree       0.447905  0.421476  0.440476  0.436381  0.426714  0.433286   
info         0.446286  0.426905  0.428667  0.426333  0.425571  0.426333   
harmony      0.447000  0.427619  0.442667  0.443476  0.433429  0.436000   
katz         0.530095  0.567381  0.544952  0.557762  0.561238  0.555476   
percolation  0.437476  0.419048  0.447143  0.448762  0.444190  0.455333   

                 katz  percolation  
eigen        0.606762     0.603381  
between      0.466524     0.430048  
close        0.448762     0.451429  
degree       0.448333     0.436000  
info         0.441952     0.444429  
harmony      0.437810     0.452238  
katz         0.526095     0.565857  
pe

In [2]:
## check all possible combinations of features (hemispheres)
subjects = SUBJECTS.copy()
GRAPHS = [
    metric_for_hemispheres(subjects, nx.algorithms.cluster.transitivity),
    metric_for_hemispheres(subjects, nx.algorithms.smetric.s_metric, normalized=False),
    metric_for_hemispheres(subjects, nx.algorithms.global_efficiency),
    # additional metrics
    # metric: accuracy | specificity | sensitivity with Engel 1&2 groups
    metric_for_hemispheres(subjects, nx.algorithms.cluster.average_clustering, weight='weight'), # 51 | 52 | 57
    metric_for_hemispheres(subjects, large_clique_size), # 62 | 68 | 60
]

In [4]:
print('graphs done')
path = f'/home/user/Documents/NodesEstimation/theta&late_theta&early_alpha&alpha/all_graphs_all_subjects_theta&l-theta&e-alpha&alpha.pkl'
pickle.dump(
    GRAPHS,
    open(
        path,
        'wb'
    )
)

print(f'Graphs are saved at {path = }')


graphs done
Graphs are saved at path = '/home/user/Documents/NodesEstimation/theta&late_theta&early_alpha&alpha/all_graphs_all_subjects_theta&l-theta&e-alpha&alpha.pkl'


In [9]:
index = ['transitivity', 's_metric', 'global_efficiency', 'average_clustering', 'large_clique_size']
features = ['wpli', 'coh', 'imcoh', 'plv', 'ciplv', 'ppc', 'pli', 'pli2_unbiased', 'wpli2_debiased']
frequencies = ['4-8', '6-8', '8-14', '8-10']

for freq in frequencies:
    for feat in ['wpli']:
        series = list()
        for graph1, kind1 in zip(GRAPHS, index):
            curr_series = list()
            for graph2, kind2 in zip(GRAPHS, index):
                assert all(graph1['resected'] == graph2['resected'])
                full_data_1 = graph1[[f'{kind1}_for_{feat}_{freq}Hz', 'resected']]
                full_data_2 = graph2[[f'{kind2}_for_envelope_{freq}Hz']]
                full_data = pd.concat([full_data_1, full_data_2], axis=1)
                features = [f'{kind2}_for_envelope_{freq}Hz', f'{kind1}_for_{feat}_{freq}Hz']

                acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

                for i in range(1000):
                    clf = svm.SVC(kernel='sigmoid')

                    scaler = StandardScaler()

                    ## Engel1&2 data
                    engel_1_2 = full_data.loc[(elem[:4] in [*ENGEL1, *ENGEL2] and not elem[:4] in REJECTED for elem in full_data.index)]

                    dataset = engel_1_2.sample(frac = 1)
                    y = dataset['resected'].to_numpy()
                    dataset = dataset.drop(['resected'], axis=1)
                    x = scaler.fit_transform(dataset)
                    x_train, x_test, y_train, y_test = train_test_split(x, y)

                    clf.fit(x_train, y_train)
                    pred = clf.predict(x_test)

                    acc.append(accuracy_score(y_test, pred))
                    tn, fp, fn, tp = confusion_matrix(y_test, pred, labels=[0,1]).ravel()
                    if not any([tn + fp == 0, tp + fn == 0, tn + fn == 0, tp + fp == 0]):
                        spec.append(tn / (tn + fp))
                        sens.append(tp / (tp + fn))
                        negpred.append(tn/(tn + fn))
                        pospred.append(tp/(tp + fp))

                curr_series.append(np.array(acc).mean())
                # print(f'{features[1]},\n{features[0]}')
                # print('acc: ', np.array(acc).mean())
                print(kind1, kind2, feat, freq, np.around(np.array(acc).mean(), decimals=3))
                # print('spec: ', np.array(spec).mean())
                # print('sens: ', np.array(sens).mean())
            series.append(pd.Series(curr_series))

        df = pd.DataFrame(series, index=index).rename(columns={i: col for i, col in enumerate(index)})
        df.to_csv(f'/home/user/Documents/NodesEstimation/performance-tables/hemispheres_metrics_accuracy_{feat}_for_{freq}Hz.csv')
        print(f'{features[1]}|{features[0]}')
        print(df)


transitivity transitivity wpli 4-8 0.692
transitivity s_metric wpli 4-8 0.737
transitivity global_efficiency wpli 4-8 0.736
transitivity average_clustering wpli 4-8 0.694
transitivity large_clique_size wpli 4-8 0.666
s_metric transitivity wpli 4-8 0.689
s_metric s_metric wpli 4-8 0.732
s_metric global_efficiency wpli 4-8 0.739
s_metric average_clustering wpli 4-8 0.695
s_metric large_clique_size wpli 4-8 0.674
global_efficiency transitivity wpli 4-8 0.704
global_efficiency s_metric wpli 4-8 0.734
global_efficiency global_efficiency wpli 4-8 0.735
global_efficiency average_clustering wpli 4-8 0.676
global_efficiency large_clique_size wpli 4-8 0.669
average_clustering transitivity wpli 4-8 0.612
average_clustering s_metric wpli 4-8 0.647
average_clustering global_efficiency wpli 4-8 0.645
average_clustering average_clustering wpli 4-8 0.518
average_clustering large_clique_size wpli 4-8 0.487
large_clique_size transitivity wpli 4-8 0.648
large_clique_size s_metric wpli 4-8 0.705
large_cli