In [4]:
import itertools
import operator
import re
from abc import *
from copy import deepcopy
from operator import itemgetter
from typing import *
from nodestimation.learning.connectoming import *
from nodestimation.learning.modification import normalize_df
import numpy as np
import scipy as sp
import networkx as nx
import mne
import time
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors._dist_metrics import DistanceMetric
from sklearn.utils import shuffle
from typing import List, Tuple
from sklearn.model_selection import train_test_split
from nodestimation.learning.estimation import collect_statistic, \
    compute_importance, collect_cross_statistic, make_selection_map, \
    select, separate_datasets, selected_statistic, choose_best, selected_data, make_feature_selection
from nodestimation.learning.informativeness import CrossInformativeness, Informativeness, SubjectsInformativeness, \
    NodesInformativeness
from nodestimation.learning.networking import sparse_graph, graph_to_hemispheres, hemispheres_division_modularity, \
    metric_for_hemispheres
from nodestimation.processing.features import prepare_features
from nodestimation.project import find_subject_dir, conditions_unique_code
from nodestimation.pipeline import pipeline
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import nibabel
import nilearn.plotting as nplt
from nodestimation.project.actions import read
import nodestimation as nd
from nodestimation.learning.modification import append_series, promote
import nodestimation.learning.modification as lmd
from nodestimation.project.subject import Subject
from sklearn.preprocessing import *
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectFromModel
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
import seaborn as sns
import matplotlib as mpl
from nodestimation.learning.selection import SubjectsStatistic, Wilcoxon, Mannwhitneyu, Test
from scipy.stats import wilcoxon
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
from scipy.fftpack import fftfreq, irfft, rfft
from scipy.fftpack import fftfreq, irfft, rfft

In [5]:
SUBJECTS = pipeline(
    methods=['wpli', 'envelope', 'coh', 'imcoh', 'plv', 'ciplv', 'ppc', 'pli', 'pli2_unbiased', 'wpli2_debiased'],
    freq_bands=(4, 8),
    centrality_metrics=['eigen', 'between', 'degree', 'info']
    )

ENGEL1 = [
    'B1C2',
    'B1R1',
    'G1R1',
    'G1V2',
    'J1T2',
    'K1V1',
    'L1P1',
    'M1G2',
    'M1N2',
    'O1O2',
    'R1D2',
    'S1A2',
    'S1B1',
    'S1H1',
    'S1U3'
]
ENGEL2 = [
    'L2M1',
    'M2S2',
    'N2K2',
    'P1H2'
]
ENGEL34 = [
    'N3S2',
    'S3R1',
    'K4L2'
]
REJECTED = [
    'S1U3',
    'P1H2'
]

INCLUDED = [
    'B1R1',
    'G1R1',
    'G1V2',
    'L1P1',
    'M1G2',
    'M1N2',
    'O1O2',
    'R1D2',
]

subjects = [
        subject
        for subject in SUBJECTS
        if subject.name in INCLUDED
    ]
STAT = SubjectsStatistic(
    subjects,
    'resected',
    centrality_metric='eigen'
)
GRAPHS = [
    metric_for_hemispheres(subjects, nx.algorithms.cluster.transitivity),
    metric_for_hemispheres(subjects, nx.algorithms.smetric.s_metric, normalized=False),
    metric_for_hemispheres(subjects, nx.algorithms.global_efficiency),
]

All computation has been already done, loading of the existing file with the solution...


In [6]:
features = ['4-8Hz_wpli', '4-8Hz_envelope']

acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
stat = STAT

for i in range(1000):
    # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
    # clf = AdaBoostClassifier(n_estimators=10)
    clf = svm.SVC(kernel='sigmoid')
    # clf = svm.SVC()
    # clf = svm.SVC(kernel='linear')
    # clf = svm.SVC(kernel='poly')
    # clf = SGDClassifier()
    # clf = KNeighborsClassifier(n_neighbors=7, metric='chebyshev')
    # clf = LogisticRegression(class_weight={True: 1, False: .8})
    # clf = LogisticRegression()
    # clf = RandomForestClassifier(max_depth=20)
    # clf = GaussianNB()
    # clf = LinearDiscriminantAnalysis()
    # clf = QuadraticDiscriminantAnalysis()
    # clf = KMeans(n_clusters=2, algorithm='full')
    # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(10, 10), max_iter=1450)
    true_data = stat.datasets['true'][features]
    # false_data = lmd.lead_std(stat.datasets['false_res'][features], take_std_from=stat.datasets['true'][features], axis=1)
    false_data = stat.datasets['false_mirror'][features]
    # false_data = stat.random_samples()[features]
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)

    scaler = StandardScaler()

    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)
    x = scaler.fit_transform(dataset)
    x_train, x_test, y_train, y_test = train_test_split(x, y)

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)

    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())


  spec.append(tn / (tn + fp))
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):


acc:  0.7063333333333334
spec:  nan
sens:  0.6677543650793651


In [64]:
acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
for dataset, name in zip(GRAPHS, ['transitivity', 's_metric', 'global_efficiency']):
    for _ in range(1000):

        y = dataset['resected'].to_numpy()
        x = dataset[[
            f'{name}_for_wpli_4-8Hz',
            f'{name}_for_envelope_4-8Hz'
        ]].to_numpy()
        scaler = StandardScaler()
        x = scaler.fit_transform(x)

        samples = [[sample] for sample in dataset.index.tolist()]

        x = np.append(x, samples, axis=1)
        x_train, x_test, y_train, y_test = train_test_split(x, y)
        train_samples, test_samples = x_train[:, 2], x_test[:, 2]
        x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

        # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
        # clf = AdaBoostClassifier(n_estimators=10)
        clf = svm.SVC(kernel='sigmoid')
        # clf = svm.SVC()
        # clf = svm.SVC(kernel='linear')
        # clf = svm.SVC(kernel='poly')
        # clf = SGDClassifier()
        # clf = KNeighborsClassifier(n_neighbors=7, metric='chebyshev')
        # clf = LogisticRegression(class_weight={True: 1, False: .8})
        # clf = LogisticRegression()
        # clf = RandomForestClassifier(max_depth=20)
        # clf = GaussianNB()
        # clf = LinearDiscriminantAnalysis()
        # clf = QuadraticDiscriminantAnalysis()
        # clf = KMeans(n_clusters=2, algorithm='full')
        # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(10, 10), max_iter=1450)

        clf.fit(x_train, y_train)
        pred = clf.predict(x_test)

        acc.append(accuracy_score(y_test, pred))
        tn, fp, fn, tp = confusion_matrix(y_test, pred, labels=[0,1]).ravel()
        spec.append(tn / (tn + fp))
        sens.append(tp / (tp + fn))

        if np.isnan(tp/(tp + fp)):
            continue
        else:
            pospred.append(tp/(tp + fp))

        if np.isnan((tn/(tn + fn))):
            continue
        else:
            negpred.append(tn/(tn + fn))
    print(name)
    spec = [s for s in spec if not np.isnan(s)]
    sens = [s for s in sens if not np.isnan(s)]
    print('acc: ', np.array(acc).mean())
    print('spec: ', np.array(spec).mean())
    print('sens: ', np.array(sens).mean())


  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  spec.append(tn / (tn + fp))
  sens.append(tp / (t

transitivity
acc:  0.93175
spec:  0.9967184801381692
sens:  0.8718545329196827


  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  spec.append(tn / (tn + fp))
  spec.append(tn / (tn + fp))
  spec.append(tn / (tn + fp))
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  spec.append(tn / (tn + fp))
  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  sens.append(tp / (tp + fn))
  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp

s_metric
acc:  0.934625
spec:  0.9970578054690203
sens:  0.8762730882099085


  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  spec.append(tn / (tn + fp))
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  if np.isnan((tn/(tn + fn))):
  spec.append(tn / (tn + fp))
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  spec.append(tn / (tn + fp))
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp + fn))
  if np.isnan(tp/(tp + fp)):
  sens.append(tp / (tp +

global_efficiency
acc:  0.9365833333333333
spec:  0.9975744975744976
sens:  0.8777758600851456


  if np.isnan(tp/(tp + fp)):
