In [None]:
import itertools
import operator
import re
from abc import *
from copy import deepcopy
from operator import itemgetter
from typing import *
import numpy as np
import scipy as sp
import networkx as nx
import mne
import time
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors._dist_metrics import DistanceMetric
from sklearn.utils import shuffle
from typing import List, Tuple
from sklearn.model_selection import train_test_split
from nodestimation.learning.estimation import collect_statistic, \
    compute_importance, collect_cross_statistic, make_selection_map, \
    select, separate_datasets, selected_statistic, choose_best, selected_data, make_feature_selection
from nodestimation.learning.informativeness import CrossInformativeness, Informativeness, SubjectsInformativeness, \
    NodesInformativeness
from nodestimation.learning.networking import sparse_graph, graph_to_hemispheres, hemispheres_division_modularity, \
    metric_for_hemispheres
from nodestimation.processing.features import prepare_features
from nodestimation.project import find_subject_dir, conditions_unique_code
from nodestimation.pipeline import pipeline
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import nibabel
import nilearn.plotting as nplt
from nodestimation.project.actions import read
import nodestimation as nd
from nodestimation.learning.modification import append_series, promote
import nodestimation.learning.modification as lmd
from nodestimation.project.subject import Subject
from sklearn.preprocessing import *
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectFromModel
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
import seaborn as sns
import matplotlib as mpl
from nodestimation.learning.selection import SubjectsStatistic, Wilcoxon, Mannwhitneyu, Test
from scipy.stats import wilcoxon
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
from scipy.fftpack import fftfreq, irfft, rfft
from scipy.fftpack import fftfreq, irfft, rfft


In [None]:
# subjects = pipeline(
#     methods=['wpli', 'envelope'],
#     freq_bands=(7.5, 12),
#     centrality_metrics=['eigen', 'close', 'between', 'degree', ],# 'katz', 'info', 'harmonic']
#     subjects_specificity={
#         'M2S2': {
#             'freq_bands': (7.5, 12.5)
#         },
#         'R1D2': {
#             'freq_bands': (7.5, 11)
#         },
#         'S1A2': {
#             'freq_bands': (5, 10)
#         },
#         'S1H1': {
#             'freq_bands': (8, 13)
#         },
#         'K1V1': {
#             'freq_bands': (7.5, 11)
#         },
#         'L1P1': {
#             'freq_bands': (5, 10)
#         },
#         'M1G2': {
#             'freq_bands': (7, 11)
#         },
#         'G1V2': {
#             'freq_bands': (7, 11)
#         },
#         'G1R1': {
#             'freq_bands': (12.5, 16.5)
#         },
#         'M1N2': {
#             'freq_bands': (10, 15)
#         },
#         'B1R1': {
#             'freq_bands': (6, 11)
#         },
#         'B1C2': {
#             'freq_bands': (7.5, 12.5)
#         },
#         'J1T2': {
#             'freq_bands': (11, 15)
#         },
#         'O1O2': {
#             'freq_bands': (5.5, 9.5)
#         },
#     }
# )

# subjects = pipeline(
#     methods=['wpli', 'envelope'],
#     freq_bands=(4, 8),
#     centrality_metrics=['eigen', 'close', 'between', 'degree']
#     )

subjects = pipeline(
    methods=['wpli', 'envelope', 'coh', 'imcoh', 'plv', 'ciplv', 'ppc', 'pli', 'pli2_unbiased', 'wpli2_debiased'],
    freq_bands=(4, 8),
    centrality_metrics=['eigen', 'between', 'degree', 'info']
    )

# for subject in subjects:
#     for dataset in subject.datasets:
#         print(subject.datasets[dataset].head())
        # print(dataset)
        # print(subject.dataset[dataset])
        # columns = subject.dataset[dataset].columns.to_list()
        # rule = dict()
        # for column in columns:
        #     if 'wpli' in column:
        #         rule.update({column: 'wpli'})
        #     if 'envelope' in column:
        #         rule.update({column: 'envelope'})
        # subject.dataset[dataset] = subject.dataset[dataset].rename(columns=rule, copy=True)



datasets = {
    subject: {
        freq: {
            method: lmd.suppress(
                    subject.connectomes[freq][method],
                    trigger=subject.connectomes[freq][method].mean().mean(),
                    optimal=0
                )
            # method: subject.connectomes[freq][method]
           for method in subject.connectomes[freq]
        }
        for freq in subject.connectomes
    }
    for subject in subjects
}

In [None]:
print(len(subjects))

In [3]:
close, between, eigen, degree, info, harmony = dict(), dict(), dict(), dict(), dict(), dict()
for subject in datasets:
    close.update({subject.name: dict()})
    between.update({subject.name: dict()})
    eigen.update({subject.name: dict()})
    degree.update({subject.name: dict()})
    info.update({subject.name: dict()})
    harmony.update({subject.name: dict()})
    # katz.update({subject.name: dict()})
    for freq in datasets[subject]:
        for method in datasets[subject][freq]:
            print(subject.name, freq, method)
            label_names = datasets[subject][freq][method].columns
            if nx.is_connected(
                    nx.convert_matrix.from_numpy_array(
                        datasets[subject][freq][method].to_numpy()
                    )
            ):
                arr  = datasets[subject][freq][method].to_numpy()
            elif nx.is_connected(
                    nx.convert_matrix.from_numpy_array(
                        subject.connectomes[freq][method].to_numpy()
                    )
            ):
                arr = subject.connectomes[freq][method].to_numpy()
            else:
                raise ValueError(f'Graph not connected: {subject.name}, {freq}, {method}')
            if arr.min().min() < 0:
                for i in range(arr.shape[0]):
                    for j in range(arr.shape[1]):
                        if arr[i, j] != 0: arr[i, j] -= arr.min().min()
            G = nx.convert_matrix.from_numpy_array(arr)
            mapping = {node: label_name for node, label_name in zip(G, label_names)}
            G = nx.relabel_nodes(G, mapping)
            for place, data in zip(
                [
                    close[subject.name],
                    between[subject.name],
                    eigen[subject.name],
                    info[subject.name],
                    harmony[subject.name],
                    degree[subject.name],
                    # katz[subject.name]
                ],[
                        nx.closeness_centrality(G, distance='weight'),
                        nx.betweenness_centrality(G, weight='weight'),
                        nx.eigenvector_centrality_numpy(G, weight='weight'),
                        nx.information_centrality(G, weight='weight'),
                        nx.harmonic_centrality(G, distance='weight'),
                        dict(G.degree(weight='weight')),
                        # nx.katz_centrality(G, weight='weight', max_iter=100)
                    ]
            ):
                place.update({
                    freq + '_' + method: pd.Series(data)
                })

print('All is done')

N2K2 4-8Hz ciplv
N2K2 4-8Hz ppc
N2K2 4-8Hz pli
N2K2 4-8Hz pli2_unbiased
N2K2 4-8Hz wpli2_debiased
K4L2 4-8Hz wpli
K4L2 4-8Hz envelope
K4L2 4-8Hz coh
K4L2 4-8Hz imcoh
K4L2 4-8Hz plv
K4L2 4-8Hz ciplv
K4L2 4-8Hz ppc
K4L2 4-8Hz pli
K4L2 4-8Hz pli2_unbiased
K4L2 4-8Hz wpli2_debiased
B1C2 4-8Hz wpli
B1C2 4-8Hz envelope
B1C2 4-8Hz coh
B1C2 4-8Hz imcoh
B1C2 4-8Hz plv
B1C2 4-8Hz ciplv
B1C2 4-8Hz ppc
B1C2 4-8Hz pli
B1C2 4-8Hz pli2_unbiased
B1C2 4-8Hz wpli2_debiased
J1T2 4-8Hz wpli
J1T2 4-8Hz envelope
J1T2 4-8Hz coh
J1T2 4-8Hz imcoh
J1T2 4-8Hz plv
J1T2 4-8Hz ciplv
J1T2 4-8Hz ppc
J1T2 4-8Hz pli
J1T2 4-8Hz pli2_unbiased
J1T2 4-8Hz wpli2_debiased
O1O2 4-8Hz wpli
O1O2 4-8Hz envelope
O1O2 4-8Hz coh
O1O2 4-8Hz imcoh
O1O2 4-8Hz plv
O1O2 4-8Hz ciplv
O1O2 4-8Hz ppc
O1O2 4-8Hz pli
O1O2 4-8Hz pli2_unbiased
O1O2 4-8Hz wpli2_debiased
L2M1 4-8Hz wpli
L2M1 4-8Hz envelope
L2M1 4-8Hz coh
L2M1 4-8Hz imcoh
L2M1 4-8Hz plv
L2M1 4-8Hz ciplv
L2M1 4-8Hz ppc
L2M1 4-8Hz pli
L2M1 4-8Hz pli2_unbiased
L2M1 4-8Hz wpli2_debias

In [4]:
datasets_centrality = dict()

for subject in subjects:
    datasets_centrality.update({subject.name: dict()})
    datasets_centrality[subject.name].update({
        'close': pd.DataFrame(close[subject.name]),
        'between': pd.DataFrame(between[subject.name]),
        'eigen': pd.DataFrame(eigen[subject.name]),
        'degree': pd.DataFrame(degree[subject.name]),
        'info': pd.DataFrame(info[subject.name]),
        'harmony': pd.DataFrame(harmony[subject.name]),
        # 'katz': pd.DataFrame(katz[subject.name])
    })
    true = subject.datasets['eigen']['resected']
    datasets_centrality[subject.name]['close'] = datasets_centrality[subject.name]['close'].assign(resected=true)
    datasets_centrality[subject.name]['between'] = datasets_centrality[subject.name]['between'].assign(resected=true)
    datasets_centrality[subject.name]['eigen'] = datasets_centrality[subject.name]['eigen'].assign(resected=true)
    datasets_centrality[subject.name]['degree'] = datasets_centrality[subject.name]['degree'].assign(resected=true)
    datasets_centrality[subject.name]['info'] = datasets_centrality[subject.name]['info'].assign(resected=true)
    datasets_centrality[subject.name]['harmony'] = datasets_centrality[subject.name]['harmony'].assign(resected=true)
    # datasets_centrality[subject.name]['katz'] = datasets_centrality[subject.name]['katz'].assign(resected=true)

for subject in subjects:
    subject.datasets = datasets_centrality[subject.name]

In [5]:
stat1 = SubjectsStatistic(subjects, 'resected', centrality_metric='eigen')
print('done 1')
stat2 = SubjectsStatistic(subjects, 'resected', centrality_metric='between')
print('done 2')
stat3 = SubjectsStatistic(subjects, 'resected', centrality_metric='close')
print('done 3')
stat4 = SubjectsStatistic(subjects, 'resected', centrality_metric='degree')
print('done 4')
stat5 = SubjectsStatistic(subjects, 'resected', centrality_metric='info')
print('done 5')
stat6 = SubjectsStatistic(subjects, 'resected', centrality_metric='harmony')
print('done 6')
# stat7 = SubjectsStatistic(subjects, 'resected', centrality_metric='katz')
print('ok')

done 1
done 2
done 3
done 4
done 5
done 6
ok


In [None]:
test1 = stat1.test(state='reflected')
test2 = stat2.test(state='reflected')
test3 = stat3.test(state='reflected')
test4 = stat4.test(state='reflected')
test5 = stat5.test(state='reflected')
test6 = stat6.test(state='reflected')
test1_samples, test2_samples, test3_samples, test4_samples, test5_samples, test6_samples = list(), list(), list(), list(), list(), list()
for feature in test1.result:
    test1_samples.append(test1.result[feature][1])
    test2_samples.append(test2.result[feature][1])
    test3_samples.append(test3.result[feature][1])
    test4_samples.append(test4.result[feature][1])
    test5_samples.append(test5.result[feature][1])
    test6_samples.append(test6.result[feature][1])

test_samples = np.array([
    np.array(test1_samples),
    np.array(test2_samples),
    np.array(test3_samples),
    np.array(test4_samples),
    np.array(test5_samples),
    np.array(test6_samples)
])

df = pd.DataFrame(test_samples, columns=list(test1.result.keys()), index=[
    'eigen', 'between', 'close', 'degree', 'info', 'harmony'
]).T
print(df)
df.to_csv('/home/user/Documents/Wilcoxon.csv')

In [None]:
test1 = stat1.test(state='reflected', test='mannwhitneyu')
test2 = stat2.test(state='reflected', test='mannwhitneyu')
test3 = stat3.test(state='reflected', test='mannwhitneyu')
test4 = stat4.test(state='reflected', test='mannwhitneyu')
test5 = stat5.test(state='reflected', test='mannwhitneyu')
test6 = stat6.test(state='reflected', test='mannwhitneyu')
test1_samples, test2_samples, test3_samples, test4_samples, test5_samples, test6_samples = list(), list(), list(), list(), list(), list()
for feature in test1.result:
    test1_samples.append(test1.result[feature][1])
    test2_samples.append(test2.result[feature][1])
    test3_samples.append(test3.result[feature][1])
    test4_samples.append(test4.result[feature][1])
    test5_samples.append(test5.result[feature][1])
    test6_samples.append(test6.result[feature][1])

test_samples = np.array([
    np.array(test1_samples),
    np.array(test2_samples),
    np.array(test3_samples),
    np.array(test4_samples),
    np.array(test5_samples),
    np.array(test6_samples)
])

df = pd.DataFrame(test_samples, columns=list(test1.result.keys()), index=[
    'eigen', 'between', 'close', 'degree', 'info', 'harmony'
]).T
print(df)
df.to_csv('/home/user/Documents/MannWhitneyu.csv')

In [None]:
# plt.imshow(
#     pd.DataFrame(
#         nx.to_numpy_matrix(
#             subjects[0].graph['7.5-12.5Hz']['wpli']
#         )
#     ).to_numpy())
# plt.show()
# plt.imshow(
#     lmd.suppress(
#         pd.DataFrame(
#             nx.to_numpy_matrix(
#                 subjects[0].graph['7.5-12.5Hz']['wpli']
#             )
#         ),
#         optimal=0
#     ).to_numpy())
# plt.show()
#
G = sparse_graph(subjects[0].graph['7.5-12.5Hz']['wpli'])
lh, rh = graph_to_hemispheres(G)
print(lh.number_of_edges(), lh.number_of_nodes())
# nx.draw(subjects[0].graph['7.5-12.5Hz']['wpli'])
# plt.show()
# nx.draw(G)
# plt.show()
# nx.draw(lh)
# plt.show()
# nx.draw(rh)
# plt.show()
# print('all: ', smallworldness(G))
import time
start = time.time()
# print(smallworldness(nx.complete_graph(5)))
# print(time.time() - start)
# print(smallworldness(nx.complete_graph(10)))
# print(time.time() - start)
# print(smallworldness(nx.complete_graph(75)))
# print(time.time() - start)
# print('lh: ', smallworldness(lh))
# print('rh: ', smallworldness(rh))

In [None]:
for subject in subjects:
    print(subject.name)
    print('\n\tresected nodes:')
    for node in subject.nodes:
        if node.type == 'resected':
            print(f'\t{node.label.name}')
    for freq in subject.connectomes:
        for method in subject.connectomes[freq]:
            print(f'\n\t{freq}: {method}')
            label_names = list(subject.connectomes[freq][method].index)
            mapping = {
                i: label_name
                for i, label_name in zip(
                    range(len(label_names)),
                    label_names
                )
            }
            G = sparse_graph(
                nx.convert_matrix.from_numpy_matrix(
                    subject.connectomes[freq][method].to_numpy()
                )
            )
            G = nx.relabel_nodes(G, mapping)
            lh, rh = graph_to_hemispheres(G)
            print(f'\n\themispheres division modularity: {hemispheres_division_modularity(G)}\n')
            print('\n\t s for lh', nx.algorithms.smetric.s_metric(lh, normalized=False)/100000)
            print('\n\t s for rh', nx.algorithms.smetric.s_metric(rh, normalized=False)/100000)

In [6]:
dataset1 = metric_for_hemispheres(subjects, nx.algorithms.cluster.transitivity)
dataset2 = metric_for_hemispheres(subjects, nx.algorithms.smetric.s_metric, normalized=False)
dataset3 = metric_for_hemispheres(subjects, nx.algorithms.global_efficiency)
# dataset = metric_for_hemispheres(subjects, nx.algorithms.smetric.s_metric, normalized=False)
# dataset = metric_for_hemispheres(subjects, nx.algorithms.global_efficiency)
# dataset = s_for_hemispheres(subjects)

P1H2: DONE, RUNTIME: 3.446002721786499
M2S2: DONE, RUNTIME: 3.3521647453308105
R1D2: DONE, RUNTIME: 3.8036186695098877
N3S2: DONE, RUNTIME: 3.0101189613342285
S1A2: DONE, RUNTIME: 2.8685755729675293
S1H1: DONE, RUNTIME: 4.034933805465698
K1V1: DONE, RUNTIME: 3.8956894874572754
L1P1: DONE, RUNTIME: 3.00840425491333
M1G2: DONE, RUNTIME: 3.700443744659424
G1V2: DONE, RUNTIME: 2.778379201889038
G1R1: DONE, RUNTIME: 2.794992685317993
M1N2: DONE, RUNTIME: 3.0254180431365967
S1B1: DONE, RUNTIME: 3.462425708770752
S1U3: DONE, RUNTIME: 2.9969425201416016
B1R1: DONE, RUNTIME: 2.899250030517578
S3R1: DONE, RUNTIME: 3.356846809387207
N2K2: DONE, RUNTIME: 5.198939800262451
K4L2: DONE, RUNTIME: 3.3211276531219482
B1C2: DONE, RUNTIME: 3.375744342803955
J1T2: DONE, RUNTIME: 3.3323800563812256
O1O2: DONE, RUNTIME: 3.3152332305908203
L2M1: DONE, RUNTIME: 3.8451991081237793
P1H2: DONE, RUNTIME: 2.136042833328247
M2S2: DONE, RUNTIME: 2.086578130722046
R1D2: DONE, RUNTIME: 2.2747559547424316
N3S2: DONE, RU

In [None]:
# print(dataset[['transitivity_for_wpli_4-8Hz', 'transitivity_for_envelope_4-8Hz', 'resected']])

In [None]:
datasets = [dataset1, dataset2, dataset3]
names = ['transitivity', 's_metric', 'global_efficiency']
cross_hemispheres_informativeness_arr = list()
cross_subjects_informativeness_arr = list()

for dataset, name in zip(datasets, names):
    acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
    for i in range(1000):

        y = dataset['resected'].to_numpy()
        # x = dataset.drop(['resected'], axis=1).to_numpy()
        # x = dataset[['s_metric_for_wpli_4-8Hz', 's_metric_for_envelope_4-8Hz']].to_numpy()
        x = dataset[[f'{name}_for_wpli_4-8Hz', f'{name}_for_envelope_4-8Hz']].to_numpy()
        # x = dataset[['global_efficiency_for_wpli_4-8Hz', 'global_efficiency_for_envelope_4-8Hz']].to_numpy()

        scaler = StandardScaler()
        # scaler = MinMaxScaler()
        # scaler = MaxAbsScaler()
        # scaler = RobustScaler()
        x = scaler.fit_transform(x)

        samples = [[sample] for sample in dataset.index.tolist()]

        x = np.append(x, samples, axis=1)
        x_train, x_test, y_train, y_test = train_test_split(x, y)
        train_samples, test_samples = x_train[:, 2], x_test[:, 2]
        x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]
        # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
        # clf = AdaBoostClassifier(n_estimators=10)
        # clf = svm.SVC(kernel='sigmoid')
        clf = svm.SVC()
        # clf = svm.SVC(kernel='linear', class_weight={True: .8, False: 1})
        # clf = svm.SVC(kernel='sigmoid', class_weight={True: .8, False: 1})
        # clf = svm.SVC(class_weight={True: .8, False: 1})
        # clf = SGDClassifier()
        # clf = KNeighborsClassifier(n_neighbors=3)
        # clf = LogisticRegression(class_weight={True: .7, False: 1})
        # clf = RandomForestClassifier(max_depth=20)
        # clf = GaussianNB()
        # clf = LinearDiscriminantAnalysis()
        # clf = QuadraticDiscriminantAnalysis()
        # clf = KMeans(n_clusters=2, algorithm='full')
        # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(10, 10), max_iter=1450)

        clf.fit(x_train, y_train)
        pred = clf.predict(x_test)

        acc.append(accuracy_score(y_test, pred))
        tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
        spec.append(tn / (tn + fp))
        sens.append(tp / (tp + fn))

        if np.isnan(tp/(tp + fp)):
            pospred.append(0)
        else:
            pospred.append(tp/(tp + fp))

        if np.isnan((tn/(tn + fn))):
            negpred.append(0)
        else:
            negpred.append(tn/(tn + fn))

    print(f'acc: {np.array(acc).mean()}')
    print(f'sens: {np.array(pospred).mean()}')
    print(f'spec: {np.array(negpred).mean()}')

In [8]:
datasets = [dataset1, dataset2, dataset3]
names = ['transitivity', 's_metric', 'global_efficiency']
cross_hemispheres_informativeness_arr = list()
cross_subjects_informativeness_arr = list()

for dataset, name in zip(datasets, names):
    cross_hemispheres_informativeness = CrossInformativeness()
    cross_subjects_informativeness = CrossInformativeness()

    for _ in range(100):
        hemispheres_informatoveness = Informativeness()
        subjects_informativeness = SubjectsInformativeness()
        acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

        for i in range(100):

            y = dataset['resected'].to_numpy()
            # x = dataset.drop(['resected'], axis=1).to_numpy()
            # x = dataset[['s_metric_for_wpli_4-8Hz', 's_metric_for_envelope_4-8Hz']].to_numpy()
            x = dataset[[f'{name}_for_wpli_4-8Hz', f'{name}_for_envelope_4-8Hz']].to_numpy()
            # x = dataset[['global_efficiency_for_wpli_4-8Hz', 'global_efficiency_for_envelope_4-8Hz']].to_numpy()

            scaler = StandardScaler()
            # scaler = MinMaxScaler()
            # scaler = MaxAbsScaler()
            # scaler = RobustScaler()
            x = scaler.fit_transform(x)

            samples = [[sample] for sample in dataset.index.tolist()]

            x = np.append(x, samples, axis=1)
            x_train, x_test, y_train, y_test = train_test_split(x, y)
            train_samples, test_samples = x_train[:, 2], x_test[:, 2]
            x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]
            # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
            # clf = AdaBoostClassifier(n_estimators=10)
            clf = svm.SVC()
            # clf = svm.SVC(kernel='sigmoid')
            # clf = svm.SVC(kernel='linear', class_weight={True: .8, False: 1})
            # clf = svm.SVC(kernel='sigmoid', class_weight={True: .8, False: 1})
            # clf = svm.SVC(class_weight={True: .8, False: 1})
            # clf = SGDClassifier()
            # clf = KNeighborsClassifier(n_neighbors=3)
            # clf = LogisticRegression(class_weight={True: .8, False: 1})
            # clf = RandomForestClassifier(max_depth=20)
            # clf = GaussianNB()
            # clf = LinearDiscriminantAnalysis()
            # clf = QuadraticDiscriminantAnalysis()
            # clf = KMeans(n_clusters=2, algorithm='full')
            # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(10, 10), max_iter=1450)

            clf.fit(x_train, y_train)
            pred = clf.predict(x_test)

            for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
                hemispheres_informatoveness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'
                subjects_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'

        cross_subjects_informativeness.informativeness = subjects_informativeness
        cross_hemispheres_informativeness.informativeness = hemispheres_informatoveness

    cross_hemispheres_informativeness_arr.append(cross_hemispheres_informativeness)
    cross_subjects_informativeness_arr.append(cross_subjects_informativeness)

In [None]:
data = dict(
    sorted(
        cross_subjects_informativeness_arr[0].ppv().items(),
        key=lambda item: item[0]
    )
)

plt.bar(data.keys(), data.values())
plt.xticks(rotation=90)
plt.title('Subjects-level positive prediction value (for hemispheres)')
plt.show()

data = dict(
    sorted(
        cross_hemispheres_informativeness_arr[0].ppv().items(),
        key=lambda item: item[0]
    )
)

plt.bar(data.keys(), data.values())
plt.xticks(rotation=90)
plt.title('Hemispheres-level positive prediction value, relatively to subject')
plt.show()

In [10]:
cross_nodes_informativeness = CrossInformativeness()
cross_subjects_informativeness = CrossInformativeness()
cross_samples_informativeness = CrossInformativeness()

for _ in range(100):
    # features = ['0.5-4Hz_wpli', '4-7Hz_wpli', '7-14Hz_wpli', '14-30Hz_wpli', '30-70Hz_wpli']
    # features = ['14-30Hz_wpli', '4-7Hz_wpli']
    # features = ['4-7Hz_wpli', '0.5-4Hz_envelope', '4-7Hz_envelope', '7-14Hz_envelope', '14-30Hz_envelope', '30-70Hz_envelope']
    # features = ['4-7Hz_wpli', '4-7Hz_envelope']
    features = ['4-8Hz_wpli', '4-8Hz_envelope']
    # features = ['4-8Hz_wpli2_debiased', '4-8Hz_envelope']
    # features = ['4-8Hz_wpli']
    # features = ['4-7Hz_envelope']
    # features = ['4-7Hz_wpli', '4-7Hz_psd']
    acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
    stat = stat1

    stat.datasets['true']['4-8Hz_envelope'] = stat2.datasets['true']['4-8Hz_envelope']
    stat.datasets['false_mirror']['4-8Hz_envelope'] = stat2.datasets['false_mirror']['4-8Hz_envelope']

    samples_informativeness = Informativeness()
    nodes_informativeness = NodesInformativeness()
    subject_informativeness = SubjectsInformativeness()

    for i in range(100):
        # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
        # clf = AdaBoostClassifier(n_estimators=10)
        # clf = svm.SVC(class_weight={True: .9, False: 1})
        clf = svm.SVC(kernel='sigmoid')
        # clf = svm.SVC(class_weight={True: 1, False: .8}, probability=True)
        # clf = SGDClassifier()
        # clf = KNeighborsClassifier(n_neighbors=7, metric='chebyshev')
        # clf = LogisticRegression(class_weight={True: 1, False: .8})
        # clf = LogisticRegression()
        # clf = RandomForestClassifier(max_depth=20)
        # clf = GaussianNB()
        # clf = LinearDiscriminantAnalysis()
        # clf = QuadraticDiscriminantAnalysis()
        # clf = KMeans(n_clusters=2, algorithm='full')
        # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(10, 10), max_iter=1450)
        true_data = stat.datasets['true'][features]
        # true_data = lmd.lead_std(stat.datasets['true'][features], take_std_from=stat1.datasets['false_res'][features], axis=1)
        # false_data = stat.random_samples()[features]
        # false_data = lmd.lead_std(stat.datasets['false_res'][features], take_std_from=stat1.datasets['true'][features], axis=1)
        # false_data = stat1.datasets['false_res'][features]
        # false_data = stat1.datasets['false'][features]
        # true_data = stat1.random_samples()[features]
        # false_data = stat.random_samples()[features]
        false_data = stat.datasets['false_mirror'][features]
        true_data = true_data.assign(resected=True)
        false_data = false_data.assign(resected=False)
        # samples = [[sample] for sample in true_data.index.tolist() + false_data.index.tolist()]
        dataset = pd.concat([true_data, false_data], axis=0)
        dataset = dataset.sample(frac = 1)


        scaler = StandardScaler()
        # scaler = MinMaxScaler()
        # scaler = MaxAbsScaler()
        # scaler = RobustScaler()



        y = dataset['resected'].to_numpy()
        dataset = dataset.drop(['resected'], axis=1)
        samples = [[sample] for sample in dataset.index.tolist()]
        # dataset = lmd.lead_std(dataset, take_std_from=stat1.datasets['false_res'][features], axis=1)
        # dataset = lmd.suppress(dataset, axis=1, optimal='max')
        # dataset = lmd.promote(dataset, axis=1, optimal='max')
        # dataset = lmd.clusterize(dataset, axis=1, n_clusters=3, optimal='symclose')
        # dataset = lmd.binarize(dataset, axis=1)
        x = scaler.fit_transform(dataset)
        x = np.append(x, samples, axis=1)
        # x = dataset
        x_train, x_test, y_train, y_test = train_test_split(x, y)
        train_samples, test_samples = x_train[:, 2], x_test[:, 2]
        x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

        clf.fit(x_train, y_train)
        pred = clf.predict(x_test)

        for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
            nodes_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'
            subject_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'
            samples_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'


        # prob = clf.predict_proba(x_test).tolist()
        # for p, x, y in zip(prob, pred, y_test):
        #     print(y, x, p)

        # df = pd.DataFrame(np.array([np.array(y_test), pred]).T, columns=['actually', 'prediction'])
        # print(df)

        acc.append(accuracy_score(y_test, pred))
        tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
        spec.append(tn / (tn + fp))
        sens.append(tp / (tp + fn))

        if np.isnan(tp/(tp + fp)):
            pospred.append(0)
        else:
            pospred.append(tp/(tp + fp))

        if np.isnan((tn/(tn + fn))):
            negpred.append(0)
        else:
            negpred.append(tn/(tn + fn))

    cross_nodes_informativeness.informativeness = nodes_informativeness
    cross_subjects_informativeness.informativeness = subject_informativeness
    cross_samples_informativeness.informativeness = samples_informativeness

cross_subjects_informativeness_arr.append(cross_subjects_informativeness)

In [12]:
names = ['transitivity', 's_metric', 'global_efficiency', 'eigencentrality']

print(lmd.dict_to_str(
    dict(
        sorted(
            cross_subjects_informativeness_arr[0].acc().items(),
            key=lambda item: item[0]
        )
    )
))

print(lmd.dict_to_str(
    dict(
        sorted(
            cross_subjects_informativeness_arr[1].acc().items(),
            key=lambda item: item[0]
        )
    )
))

print(lmd.dict_to_str(
    dict(
        sorted(
            cross_subjects_informativeness_arr[2].acc().items(),
            key=lambda item: item[0]
        )
    )
))

print(lmd.dict_to_str(
    dict(
        sorted(
            cross_subjects_informativeness_arr[3].acc().items(),
            key=lambda item: item[0]
        )
    )
))

{
  B1C2: 0.12394462546580498,
  B1R1: 0.9327164241599497,
  G1R1: 0.9517712420605139,
  G1V2: 0.4745371591154801,
  J1T2: 0.3495300939812037,
  K1V1: 0.0793175168329397,
  K4L2: 0.4907501321409694,
  L1P1: 0.7792342653458461,
  L2M1: 0.41611677664467106,
  M1G2: 0.6255241600169855,
  M1N2: 0.9225707172979296,
  M2S2: 0.7351594273629413,
  N2K2: 0.39590453168847767,
  N3S2: 0.02140227720229432,
  O1O2: 0.921661978703252,
  P1H2: 0.4975893933306549,
  R1D2: 0.9454475944006587,
  S1A2: 0.25445323719025936,
  S1B1: 0.4881904146803073,
  S1H1: 0.4031844850527271,
  S1U3: 0.03963650425367363,
  S3R1: 0.3317720530835285,
}

{
  B1C2: 0.26068770077349357,
  B1R1: 0.9507098229550127,
  G1R1: 0.9610440202571094,
  G1V2: 0.5000956205775483,
  J1T2: 0.7401606598221573,
  K1V1: 0.4898949905319332,
  K4L2: 0.4782608695652174,
  L1P1: 0.9586038387367104,
  L2M1: 0.6181251391672233,
  M1G2: 0.9040546112986922,
  M1N2: 0.919175503694371,
  M2S2: 0.8467868338557993,
  N2K2: 0.5014884804556045,
  N3S2: 

In [None]:
s1 = pd.Series(
    dict(
        sorted(
          cross_nodes_informativeness.acc().items(),
            key=lambda item: item[0]
        )
    )
)
s2 = pd.Series(
    dict(
        sorted(
          cross_nodes_informativeness.ppv().items(),
            key=lambda item: item[0]
        )
    )
)
s3 = pd.Series(
    dict(
        sorted(
          cross_nodes_informativeness.npv().items(),
            key=lambda item: item[0]
        )
    )
)

df = pd.DataFrame([s1, s2, s3], index=['acc', 'ppv', 'npv'])
df = df.T

df.to_csv('/home/user/Documents/Nodes_informativeness.csv')

In [None]:
s1 = pd.Series(
    dict(
        sorted(
          cross_samples_informativeness.acc().items(),
            key=lambda item: item[0]
        )
    )
)
s2 = pd.Series(
    dict(
        sorted(
          cross_samples_informativeness.ppv().items(),
            key=lambda item: item[0]
        )
    )
)
s3 = pd.Series(
    dict(
        sorted(
          cross_samples_informativeness.npv().items(),
            key=lambda item: item[0]
        )
    )
)

df = pd.DataFrame([s1, s2, s3], index=['acc', 'ppv', 'npv'])
df = df.T

df.to_csv('/home/user/Documents/Samples_informativeness.csv')

In [None]:
data = dict(
    sorted(
        cross_subjects_informativeness.ppv().items(),
        key=lambda item: item[0]
    )
)

plt.bar(data.keys(), data.values())
plt.xticks(rotation=90)
plt.title('Subjects-level positive prediction value (for regions)')
plt.show()

data = dict(
    sorted(
        cross_samples_informativeness.ppv().items(),
        key=lambda item: item[0]
    )
)

plt.bar(data.keys(), data.values())
plt.xticks(rotation=90)
plt.title('Nodes-level positive prediction value, relatively to subject')
plt.show()

In [None]:
info_correct = {
    key: cross_nodes_informativeness.mean()[1]['correct']['false'][key]
    for key in cross_nodes_informativeness.mean()[1]['correct']['false']
    if key in cross_nodes_informativeness.mean()[1]['wrong']['false']
}

info_wrong = {
    key: cross_nodes_informativeness.mean()[1]['wrong']['false'][key]
        # clf = LogisticRegression(class_weight={True: 1, False: .8})
        # clf = LogisticRegression()
        # clf = RandomFor
    for key in cross_nodes_informativeness.mean()[1]['wrong']['false']
    if key in cross_nodes_informativeness.mean()[1]['correct']['false']
}

info_correct = dict(
    sorted(
        info_correct.items(),
        key= lambda item: item[0],
        reverse=False
    )
)

info_wrong = dict(
    sorted(
        info_wrong.items(),
        key= lambda item: item[0],
        reverse=False
    )
)

tp = np.array(
    list(
        info_correct.values()
    )
)

tn = np.array(
    list(
        info_wrong.values()
    )
)

plt.bar(info_correct.keys(), tp/(tn+tp))
plt.xticks(rotation=90)
plt.title('Nodes-level false prediction accuracy, relatively to subject')
plt.show()

# print(
#     dict_to_str(info_wrong)
# )

In [None]:
# features = ['0.5-4Hz_wpli', '4-7Hz_wpli', '7-14Hz_wpli', '14-30Hz_wpli', '30-70Hz_wpli']
# features = ['14-30Hz_wpli', '4-7Hz_wpli']
# features = ['4-7Hz_wpli', '4-7Hz_envelope']
# features = ['4-7Hz_wpli', '4-7Hz_envelope']
features = ['wpli', 'envelope']
# features = ['4-7Hz_wpli']
# features = ['envelope']
# features = ['4-7Hz_wpli', '4-7Hz_psd']
acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
stat = stat1
for i in range(1000):
    # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
    # clf = AdaBoostClassifier(n_estimators=10, random_state=0)
    # clf = svm.SVC(kernel='linear')
    clf = svm.SVC(class_weight={True: .9, False: 1}, probability=True)
    # clf = KNeighborsClassifier(n_neighbors=3)
    # clf = LogisticRegression()
    # clf = RandomForestClassifier(max_depth=20)
    # clf = GaussianNB()
    # clf = LinearDiscriminantAnalysis()
    # clf = KMeans(n_clusters=2, algorithm='full')
    # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(10, 10), max_iter=1450)

    true_data = stat.datasets['true'][features]
    # false_data = stat1.random_samples()[features]
    # false_data = stat1.datasets['false_res'][features]
    # true_data = stat1.random_samples()[features]
    # false_data = stat.random_samples()[features]
    # false_data = lmd.lead_std(stat.datasets['false_res'][features], take_std_from=stat.datasets['true'][features], axis=1)
    false_data = stat.datasets['false_mirror'][features]
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()
    # scaler = MinMaxScaler()
    # scaler = MaxAbsScaler()
    # scaler = RobustScaler()



    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)


    # dataset = lmd.lead_std(dataset, take_std_from=stat1.datasets['false_res'][features], axis=1)
    dataset = lmd.suppress(dataset, axis=1, optimal='max')
    # dataset = lmd.promote(dataset, axis=1, optimal='min')
    # dataset = lmd.clusterize(dataset, axis=1, n_clusters=3, optimal='max')
    # dataset = lmd.binarize(dataset, axis=1)


    x = scaler.fit_transform(dataset)
    # x = dataset
    scores = cross_val_score(clf, x, y, cv=10)
    acc.append(scores.mean())

print('Accuracy: ', sum(acc)/len(acc), min(acc), max(acc))

In [None]:
# from nodestimation.mlearning.features import prepare_connectivity, prepare_data
# con = pkl.load(open(subjects[0].data['con'], 'rb'))

subjects_dir, subjects_ = find_subject_dir()
labels = mne.read_labels_from_annot('B1C2', parc='aparc', subjects_dir=subjects_dir)
label_names = [label.name for label in labels]
lh_labels = [name for name in label_names if name.endswith('lh')]
rh_labels = [name for name in label_names if name.endswith('rh')]

label_ypos_lh = list()

for name in lh_labels:
    idx = label_names.index(name)
    ypos = np.mean(labels[idx].pos[:, 1])
    label_ypos_lh.append(ypos)

try:
    idx = label_names.index('Brain-Stem')

except ValueError:
    pass

else:
    ypos = np.mean(labels[idx].pos[:, 1])
    lh_labels.append('Brain-Stem')
    label_ypos_lh.append(ypos)


lh_labels = [label for (yp, label) in sorted(zip(label_ypos_lh, lh_labels))]

rh_labels = [label[:-2] + 'rh' for label in lh_labels
             if label != 'Brain-Stem' and label[:-2] + 'rh' in rh_labels]


node_colors = [label.color for label in labels]

node_order = lh_labels[::-1] + rh_labels

node_angles = mne.viz.circular_layout(label_names, node_order, start_pos=90,
                              group_boundaries=[0, len(label_names) // 2])

In [None]:
# methods = [
#     'coh',
#     'imcoh',
#     'plv',
#     'ciplv',
#     'ppc',
#     'pli'
# ]
#
# freq_bands = [
#     '0.5-4Hz',
#     '4-7Hz',
#     '7-14Hz',
#     '14-30Hz'
# ]
#
# for method in methods:
#     for freq_band in freq_bands:
#         fig = plt.figure(num=None, figsize=(25, 25), facecolor='black')
#         mne.viz.plot_connectivity_circle(con[freq_band][method]['con'][:, :, 0], label_names, n_lines=300,
#                                          node_angles=node_angles, node_colors=node_colors,
#                                          title='All-to-All Connectivity Epilepsy Condition ({} for {})'
#                                          .format(method, freq_band), padding=8, fontsize_title=35, fontsize_colorbar=25,
#                                          fontsize_names=20, fig=fig
#                                          )

In [None]:
for subject in [subjects[0]]:
    fig, ax = plt.subplots(figsize=(15,15))
    display = nplt.plot_glass_brain(None, display_mode='lyrz', figure=fig, axes=ax)
    spared = [node.center_coordinates for node in subject.nodes if node.type == 'spared']
    resected = [node.center_coordinates for node in subject.nodes if node.type == 'resected']
    # resection = read['resec-mni'](subject.data['resec-mni'])
    # display.add_markers(resection, marker_color="violet", marker_size=1)
    display.add_markers(np.array(spared), marker_color="yellow", marker_size=100)
    display.add_markers(np.array(resected), marker_color="red", marker_size=250)

In [None]:
for subject in subjects[0:1]:
    fig, ax = plt.subplots(figsize=(15,15))
    display = nplt.plot_glass_brain(None, display_mode='lyrz', figure=fig, axes=ax)
    spared = [node.center_coordinates for node in subject.nodes if node.type == 'spared']
    resected = [node.center_coordinates for node in subject.nodes if node.type == 'resected']
    # resection = read['resec-mni'](subject.data['resec-mni'])
    # display.add_markers(resection, marker_color="violet", marker_size=1)
    display.add_markers(np.array(spared), marker_color="yellow", marker_size=100)
    display.add_markers(np.array(resected), marker_color="red", marker_size=250)
    plt.show()

    # fig, ax = plt.subplots(figsize=(10,4))
    nodes = np.array([node.center_coordinates for node in subject.nodes])
    print(subject.datasets.keys())
    nplt.plot_markers(subject.datasets['between']['4-8Hz_envelope']*subject.datasets['eigen']['4-8Hz_wpli'],
                      nodes, node_size=30, node_cmap='YlOrBr')
    plt.show()

In [None]:
for subject in subjects[0:1]:

    nodes = np.array([node.center_coordinates for node in subject.nodes])
    nodes_lh = np.array([node.center_coordinates for node in subject.nodes if 'lh' in node.label.name])
    nodes_rh = np.array([node.center_coordinates for node in subject.nodes if 'rh' in node.label.name])
    m = subject.connectomes['4-8Hz']['wpli'].mean().mean()


    g = lmd.suppress(
            subject.connectomes['4-8Hz']['wpli'],
            trigger=m*2,
            optimal=0
        )
    labels = subject.connectomes['4-8Hz']['wpli'].index.to_list()
    G = nx.from_numpy_array(
            subject.connectomes['4-8Hz']['wpli'].to_numpy()
        )
    mapping = {node: label for node, label in zip(G, labels)}
    G = nx.relabel_nodes(G, mapping)
    lh, rh = graph_to_hemispheres(
        G
    )
    fig, ax = plt.subplots(figsize=(15,15))
    display = nplt.plot_glass_brain(None, display_mode='lyrz', figure=fig, axes=ax)
    display.add_graph(
        g,
        nodes,
    )
    # display.add_graph(
    #     nx.to_numpy_matrix(lh),
    #     nodes_lh
    # )
