In [None]:
import itertools
import operator
import re
from abc import *
from copy import deepcopy
from operator import itemgetter
from typing import *
import numpy as np
import scipy as sp
import networkx as nx
import mne
import time
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors._dist_metrics import DistanceMetric
from sklearn.utils import shuffle
from typing import List, Tuple
from sklearn.model_selection import train_test_split
from nodestimation.learning.estimation import collect_statistic, \
    compute_importance, collect_cross_statistic, make_selection_map, \
    select, separate_datasets, selected_statistic, choose_best, selected_data, make_feature_selection
from nodestimation.learning.informativeness import CrossInformativeness, Informativeness, SubjectsInformativeness, \
    NodesInformativeness
from nodestimation.learning.networking import sparse_graph, graph_to_hemispheres, hemispheres_division_modularity, \
    metric_for_hemispheres
from nodestimation.processing.features import prepare_features
from nodestimation.project import find_subject_dir, conditions_unique_code
from nodestimation.pipeline import pipeline
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import nibabel
import nilearn.plotting as nplt
from nodestimation.project.actions import read
import nodestimation as nd
from nodestimation.learning.modification import append_series, promote
import nodestimation.learning.modification as lmd
from nodestimation.project.subject import Subject
from sklearn.preprocessing import *
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectFromModel
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
import seaborn as sns
import matplotlib as mpl
from nodestimation.learning.selection import SubjectsStatistic, Wilcoxon, Mannwhitneyu, Test
from scipy.stats import wilcoxon
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
from scipy.fftpack import fftfreq, irfft, rfft
from scipy.fftpack import fftfreq, irfft, rfft

In [None]:
subjects = pipeline(
    methods=['wpli', 'envelope', 'coh', 'imcoh', 'plv', 'ciplv', 'ppc', 'pli', 'pli2_unbiased', 'wpli2_debiased'],
    freq_bands=(4, 8),
    centrality_metrics=['eigen', 'between', 'degree', 'info']
    )

In [None]:
stat = SubjectsStatistic(subjects, 'resected', centrality_metric='eigen')
print('stat done')

dataset_ = metric_for_hemispheres(subjects, nx.algorithms.global_efficiency)
print('graph done')

In [None]:
datasets = [dataset_]
names = ['global_efficiency']
cross_hemispheres_informativeness_arr = list()
cross_subjects_informativeness_arr = list()

for dataset, name in zip(datasets, names):
    cross_hemispheres_informativeness = CrossInformativeness()
    cross_subjects_informativeness = CrossInformativeness()

    for _ in range(100):
        hemispheres_informatoveness = Informativeness()
        subjects_informativeness = SubjectsInformativeness()
        acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

        for i in range(100):

            y = dataset['resected'].to_numpy()
            x = dataset[[f'{name}_for_wpli_4-8Hz', f'{name}_for_envelope_4-8Hz']].to_numpy()

            scaler = StandardScaler()
            x = scaler.fit_transform(x)

            samples = [[sample] for sample in dataset.index.tolist()]

            x = np.append(x, samples, axis=1)
            x_train, x_test, y_train, y_test = train_test_split(x, y)
            train_samples, test_samples = x_train[:, 2], x_test[:, 2]
            x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

            clf = svm.SVC()
            clf.fit(x_train, y_train)
            pred = clf.predict(x_test)

            for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
                hemispheres_informatoveness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'
                subjects_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'

        cross_subjects_informativeness.informativeness = subjects_informativeness
        cross_hemispheres_informativeness.informativeness = hemispheres_informatoveness

    cross_hemispheres_informativeness_arr.append(cross_hemispheres_informativeness)
    cross_subjects_informativeness_arr.append(cross_subjects_informativeness)

In [None]:
cross_nodes_informativeness = CrossInformativeness()
cross_subjects_informativeness = CrossInformativeness()
cross_samples_informativeness = CrossInformativeness()

for _ in range(100):
    features = ['4-8Hz_wpli', '4-8Hz_envelope']
    acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

    samples_informativeness = Informativeness()
    nodes_informativeness = NodesInformativeness()
    subject_informativeness = SubjectsInformativeness()

    for _ in range(100):
        clf = svm.SVC()
        true_data = stat.datasets['true'][features]
        false_data = stat.datasets['false_mirror'][features]
        true_data = true_data.assign(resected=True)
        false_data = false_data.assign(resected=False)
        dataset = pd.concat([true_data, false_data], axis=0)
        dataset = dataset.sample(frac = 1)


        scaler = StandardScaler()

        y = dataset['resected'].to_numpy()
        dataset = dataset.drop(['resected'], axis=1)
        samples = [[sample] for sample in dataset.index.tolist()]

        x = scaler.fit_transform(dataset)
        x = np.append(x, samples, axis=1)
        x_train, x_test, y_train, y_test = train_test_split(x, y)
        train_samples, test_samples = x_train[:, 2], x_test[:, 2]
        x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

        clf.fit(x_train, y_train)
        pred = clf.predict(x_test)

        for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
            nodes_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'
            subject_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'
            samples_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'

    cross_nodes_informativeness.informativeness = nodes_informativeness
    cross_subjects_informativeness.informativeness = subject_informativeness
    cross_samples_informativeness.informativeness = samples_informativeness

cross_subjects_informativeness_arr.append(cross_subjects_informativeness)

In [None]:
print(lmd.dict_to_str(
    dict(
        sorted(
            cross_subjects_informativeness_arr[0].acc().items(),
            key=lambda item: item[0]
        )
    )
))

print(lmd.dict_to_str(
    dict(
        sorted(
            cross_subjects_informativeness_arr[1].acc().items(),
            key=lambda item: item[0]
        )
    )
))

In [8]:
# all, global efficiency

dataset = dataset_.copy()

In [25]:
acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

for _ in range(100):

    y = dataset['resected'].to_numpy()
    x = dataset[[
        'global_efficiency_for_wpli_4-8Hz',
        'global_efficiency_for_envelope_4-8Hz'
    ]].to_numpy()
    scaler = StandardScaler()
    x = scaler.fit_transform(x)

    samples = [[sample] for sample in dataset.index.tolist()]

    x = np.append(x, samples, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    train_samples, test_samples = x_train[:, 2], x_test[:, 2]
    x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

    clf = svm.SVC()
    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)

    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('all, global efficiency')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())

all, global efficiency
acc:  0.6381818181818182
spec:  0.7075277777777778
sens:  0.6130357142857142


In [28]:
# all, eigencentrality

acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
features = ['4-8Hz_wpli', '4-8Hz_envelope']
accur = cross_subjects_informativeness_arr[1].acc()

for _ in range(100):
    clf = svm.SVC()
    true_data = stat.datasets['true'][features].copy()
    false_data = stat.datasets['false_mirror'][features].copy()
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)

    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()

    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)
    samples = [[sample] for sample in dataset.index.tolist()]

    x = scaler.fit_transform(dataset)
    x = np.append(x, samples, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    train_samples, test_samples = x_train[:, 2], x_test[:, 2]
    x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('all, eigencentrality')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())

all, eigencentrality
acc:  0.49774999999999997
spec:  0.36949743933229656
sens:  0.6374408583432721


In [44]:
# high, global efficiency

dataset = dataset_.copy()

acc = cross_subjects_informativeness_arr[0].acc()

for sample, i in zip(dataset.index, range(len(dataset.index))):
    subject = sample[:4]
    if acc[subject] < 0.60 and i%2:
        dataset = dataset.drop(index=f'{subject}_lh')
        dataset = dataset.drop(index=f'{subject}_rh')

In [46]:
acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

for _ in range(10):

    y = dataset['resected'].to_numpy()
    x = dataset[[
        'global_efficiency_for_wpli_4-8Hz',
        'global_efficiency_for_envelope_4-8Hz'
    ]].to_numpy()
    scaler = StandardScaler()
    x = scaler.fit_transform(x)

    samples = [[sample] for sample in dataset.index.tolist()]

    x = np.append(x, samples, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    train_samples, test_samples = x_train[:, 2], x_test[:, 2]
    x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

    clf = svm.SVC()
    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)

    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('high, global efficiency')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())

high, global efficiency
acc:  0.9166666666666667
spec:  0.925
sens:  0.9216666666666666


In [48]:
# high, eigencentrality

acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
features = ['4-8Hz_wpli', '4-8Hz_envelope']
accur = cross_subjects_informativeness_arr[1].acc()

for _ in range(100):
    clf = svm.SVC()
    true_data = stat.datasets['true'][features].copy()
    false_data = stat.datasets['false_mirror'][features].copy()
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    for sample in true_data.index:
        subject = sample[:4]
        if accur[subject] < 0.60:
            true_data = true_data.drop(index=sample)
    for sample in false_data.index:
        subject = sample[:4]
        if accur[subject] < 0.60:
            false_data = false_data.drop(index=sample)
    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()

    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)
    samples = [[sample] for sample in dataset.index.tolist()]

    x = scaler.fit_transform(dataset)
    x = np.append(x, samples, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    train_samples, test_samples = x_train[:, 2], x_test[:, 2]
    x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('high, eigencentrality')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())

  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):


high, eigencentrality
acc:  0.6842857142857143
spec:  0.6861666666666667
sens:  0.7498333333333334


In [49]:
ENGEL1 = [
    'B1C2',
    'B1R1',
    'G1R1',
    'G1V2',
    'J1T2',
    'K1V1',
    'L1P1',
    'M1G2',
    'M1N2',
    'O1O2',
    'R1D2',
    'S1A2',
    'S1B1',
    'S1H1',
    'S1U3'
]
ENGEL2 = [
    'L2M1',
    'M2S2',
    'N2K2',
    'P1H2'
]
ENGEL34 = [
    'N3S2',
    'S3R1',
    'K4L2'
]

In [50]:
# Engel 1 only, global efficiency

dataset = dataset_.copy()

acc = cross_subjects_informativeness_arr[0].acc()

for sample in dataset.index:
    if not any([subject in sample for subject in ENGEL1]):
        dataset = dataset.drop(index=sample)

In [51]:
acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

for _ in range(100):

    y = dataset['resected'].to_numpy()
    x = dataset[[
        'global_efficiency_for_wpli_4-8Hz',
        'global_efficiency_for_envelope_4-8Hz'
    ]].to_numpy()
    scaler = StandardScaler()
    x = scaler.fit_transform(x)

    samples = [[sample] for sample in dataset.index.tolist()]

    x = np.append(x, samples, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    train_samples, test_samples = x_train[:, 2], x_test[:, 2]
    x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

    clf = svm.SVC()
    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)

    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('Engel 1 only, global efficiency')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())


Engel 1 only, global efficiency
acc:  0.685
spec:  0.7351428571428572
sens:  0.673047619047619


  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan(tp/(tp + fp)):
  if np.isnan((tn/(tn + fn))):


In [52]:
# Engel 1 and 2, global efficiency

dataset = dataset_.copy()

acc = cross_subjects_informativeness_arr[0].acc()

for sample in dataset.index:
    if not any([subject in sample for subject in ENGEL1]) and not any([subject in sample for subject in ENGEL2]):
        dataset = dataset.drop(index=sample)

In [53]:

acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

for _ in range(100):

    y = dataset['resected'].to_numpy()
    x = dataset[[
        'global_efficiency_for_wpli_4-8Hz',
        'global_efficiency_for_envelope_4-8Hz'
    ]].to_numpy()
    scaler = StandardScaler()
    x = scaler.fit_transform(x)

    samples = [[sample] for sample in dataset.index.tolist()]

    x = np.append(x, samples, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    train_samples, test_samples = x_train[:, 2], x_test[:, 2]
    x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

    clf = svm.SVC()
    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)

    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('Engel 1 and 2, global efficiency')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())

Engel 1 and 2, global efficiency
acc:  0.7480000000000001
spec:  0.8272738095238096
sens:  0.7204999999999998


In [54]:
# Engel 1 only, eigencentrality

acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
features = ['4-8Hz_wpli', '4-8Hz_envelope']

for _ in range(100):
    clf = svm.SVC()
    true_data = stat.datasets['true'][features].copy()
    false_data = stat.datasets['false_mirror'][features].copy()
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    for sample in true_data.index:
        if not any([subject in sample for subject in ENGEL1]):
            true_data = true_data.drop(index=sample)
    for sample in false_data.index:
        if not any([subject in sample for subject in ENGEL1]):
            false_data = false_data.drop(index=sample)
    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()

    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)
    samples = [[sample] for sample in dataset.index.tolist()]

    x = scaler.fit_transform(dataset)
    x = np.append(x, samples, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    train_samples, test_samples = x_train[:, 2], x_test[:, 2]
    x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('Engel 1 only, eigencentrality')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())


  if np.isnan(tp/(tp + fp)):


Engel 1 only, eigencentrality
acc:  0.4276785714285713
spec:  0.40640457317594725
sens:  0.4969895361022988


In [55]:
# Engel 1 and 2, eigencentrality

acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
features = ['4-8Hz_wpli', '4-8Hz_envelope']

for _ in range(100):
    clf = svm.SVC()
    true_data = stat.datasets['true'][features].copy()
    false_data = stat.datasets['false_mirror'][features].copy()
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    for sample in true_data.index:
        if not any([subject in sample for subject in ENGEL1]) and\
            not any([subject in sample for subject in ENGEL2]):
            true_data = true_data.drop(index=sample)
    for sample in false_data.index:
        if not any([subject in sample for subject in ENGEL1]) and\
                not any([subject in sample for subject in ENGEL2]):
            false_data = false_data.drop(index=sample)
    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()

    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)
    samples = [[sample] for sample in dataset.index.tolist()]

    x = scaler.fit_transform(dataset)
    x = np.append(x, samples, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    train_samples, test_samples = x_train[:, 2], x_test[:, 2]
    x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('Engel 1 and 2, eigencentrality')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())


Engel 1 and 2, eigencentrality
acc:  0.48808219178082196
spec:  0.3915372690835462
sens:  0.6109360917599053


In [91]:
# h2a, global efficiency

dataset_high = dataset_.copy()
dataset_all = dataset_.copy()

acc = cross_subjects_informativeness_arr[0].acc()

for sample, i in zip(dataset_high.index, range(len(dataset.index))):
    subject = sample[:4]
    if acc[subject] < 0.60 and i%2:
        dataset_high = dataset_high.drop(index=f'{subject}_lh')
        dataset_high = dataset_high.drop(index=f'{subject}_rh')


acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

for _ in range(10):

    y_train = dataset_high['resected'].to_numpy()
    x_train = dataset_high[[
        'global_efficiency_for_wpli_4-8Hz',
        'global_efficiency_for_envelope_4-8Hz'
    ]].to_numpy()
    y_test = dataset_all['resected'].to_numpy()
    x_test = dataset_all[[
        'global_efficiency_for_wpli_4-8Hz',
        'global_efficiency_for_envelope_4-8Hz'
    ]].to_numpy()
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.fit_transform(x_test)

    clf = svm.SVC()
    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)

    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('h2a, global efficiency')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())


h2a, global efficiency
acc:  0.75
spec:  0.8636363636363636
sens:  0.6363636363636365


In [66]:
# h2a, eigencentrality

acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
features = ['4-8Hz_wpli', '4-8Hz_envelope']
accur = cross_subjects_informativeness_arr[1].acc()

for _ in range(1000):
    clf = svm.SVC()
    true_data_all = stat.datasets['true'][features].copy()
    false_data_all = stat.datasets['false_mirror'][features].copy()
    true_data_all = true_data_all.assign(resected=True)
    false_data_all = false_data_all.assign(resected=False)
    true_data_high = true_data_all.copy()
    false_data_high = false_data_all.copy()
    for sample in true_data_high.index:
        subject = sample[:4]
        if accur[subject] < 0.60:
            true_data_high = true_data_high.drop(index=sample)
    for sample in false_data_high.index:
        subject = sample[:4]
        if accur[subject] < 0.60:
            false_data_high = false_data_high.drop(index=sample)
    dataset_all = pd.concat([true_data_all, false_data_all], axis=0)
    dataset_high = pd.concat([true_data_high, false_data_high], axis=0)
    dataset_all = dataset_all.sample(frac = 1)
    dataset_high = dataset_high.sample(frac=1)

    scaler = StandardScaler()

    y_train = dataset_high['resected'].to_numpy()
    dataset_high = dataset_high.drop(['resected'], axis=1)
    x_train = scaler.fit_transform(dataset_high)

    y_test = dataset_all['resected'].to_numpy()
    dataset_all = dataset_all.drop(['resected'], axis=1)
    x_test = scaler.fit_transform(dataset_all)

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('h2a, eigencentrality')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())


h2a, eigencentrality
acc:  0.546875
spec:  0.4562499999999999
sens:  0.6375


In [82]:
# high (nodes), eigencentrality

acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
features = ['4-8Hz_wpli', '4-8Hz_envelope']
accur = cross_samples_informativeness.acc()

for _ in range(100):
    clf = svm.SVC()
    true_data = stat.datasets['true'][features].copy()
    false_data = stat.datasets['false_mirror'][features].copy()
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    for sample in true_data.index:
        if accur[sample] < 0.60:
            true_data = true_data.drop(index=sample)
    for sample in false_data.index:
        if accur[sample] < 0.60:
            false_data = false_data.drop(index=sample)
    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()

    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)
    samples = [[sample] for sample in dataset.index.tolist()]

    x = scaler.fit_transform(dataset)
    x = np.append(x, samples, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    train_samples, test_samples = x_train[:, 2], x_test[:, 2]
    x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('high (nodes), eigencentrality')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())

high (nodes), eigencentrality
acc:  0.9929729729729729
spec:  0.995375457875458
sens:  0.992388280810361


In [86]:
# h2a, eigencentrality

acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
features = ['4-8Hz_wpli', '4-8Hz_envelope']
accur = cross_samples_informativeness.acc()

for _ in range(100):
    clf = svm.SVC()
    true_data_all = stat.datasets['true'][features].copy()
    false_data_all = stat.datasets['false_mirror'][features].copy()
    true_data_all = true_data_all.assign(resected=True)
    false_data_all = false_data_all.assign(resected=False)
    true_data_high = true_data_all.copy()
    false_data_high = false_data_all.copy()
    for sample in true_data_high.index:
        if accur[sample] < 0.60:
            true_data_high = true_data_high.drop(index=sample)
    for sample in false_data_high.index:
        if accur[sample] < 0.60:
            false_data_high = false_data_high.drop(index=sample)
    dataset_all = pd.concat([true_data_all, false_data_all], axis=0)
    dataset_high = pd.concat([true_data_high, false_data_high], axis=0)
    dataset_all = dataset_all.sample(frac = 1)
    dataset_high = dataset_high.sample(frac=1)

    scaler = StandardScaler()

    y_train = dataset_high['resected'].to_numpy()
    dataset_high = dataset_high.drop(['resected'], axis=1)
    x_train = scaler.fit_transform(dataset_high)

    y_test = dataset_all['resected'].to_numpy()
    dataset_all = dataset_all.drop(['resected'], axis=1)
    x_test = scaler.fit_transform(dataset_all)

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('h2a, eigencentrality')
print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())

h2a, eigencentrality
acc:  0.5593750000000002
spec:  0.3999999999999999
sens:  0.71875
