In [1]:
import itertools
import re
from abc import *
from operator import itemgetter
from typing import *
import numpy as np
import scipy as sp
import networkx as nx
import mne
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors._dist_metrics import DistanceMetric
from sklearn.utils import shuffle
from typing import List, Tuple
from sklearn.model_selection import train_test_split
from nodestimation.learning.estimation import collect_statistic, \
    compute_importance, collect_cross_statistic, make_selection_map, \
    select, separate_datasets, selected_statistic, choose_best, selected_data, make_feature_selection
from nodestimation.processing.features import prepare_features
from nodestimation.project import find_subject_dir, conditions_unique_code
from nodestimation.pipeline import pipeline
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import nilearn.plotting as nplt
from nodestimation.project.actions import read
import nodestimation as nd
from nodestimation.learning.modification import append_series, promote
import nodestimation.learning.modification as lmd
from nodestimation.project.subject import Subject
from sklearn.preprocessing import *
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectFromModel
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
import seaborn as sns
import matplotlib as mpl
from nodestimation.learning.selection import SubjectsStatistic
from scipy.stats import wilcoxon
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
from scipy.fftpack import fftfreq, irfft, rfft
from scipy.fftpack import fftfreq, irfft, rfft


# subjects = pipeline(
#     methods=['wpli', 'envelope'],
#     freq_bands=(7.5, 12),
#     centrality_metrics=['eigen', 'close', 'between', 'degree', ],# 'katz', 'info', 'harmonic']
#     subjects_specificity={
#         'M2S2': {
#             'freq_bands': (7.5, 12.5)
#         },
#         'R1D2': {
#             'freq_bands': (7.5, 11)
#         },
#         'S1A2': {
#             'freq_bands': (5, 10)
#         },
#         'S1H1': {
#             'freq_bands': (8, 13)
#         },
#         'K1V1': {
#             'freq_bands': (7.5, 11)
#         },
#         'L1P1': {
#             'freq_bands': (5, 10)
#         },
#         'M1G2': {
#             'freq_bands': (7, 11)
#         },
#         'G1V2': {
#             'freq_bands': (7, 11)
#         },
#         'G1R1': {
#             'freq_bands': (12.5, 16.5)
#         },
#         'M1N2': {
#             'freq_bands': (10, 15)
#         },
#         'B1R1': {
#             'freq_bands': (6, 11)
#         },
#         'B1C2': {
#             'freq_bands': (7.5, 12.5)
#         },
#         'J1T2': {
#             'freq_bands': (11, 15)
#         },
#         'O1O2': {
#             'freq_bands': (5.5, 9.5)
#         },
#     }
# )

subjects = pipeline(
    methods=['wpli', 'envelope'],
    freq_bands=(4, 8),
    centrality_metrics=['eigen', 'close', 'between', 'degree']
    )

for subject in subjects:
    for dataset in subject.dataset:
        print(subject.dataset[dataset].columns)
        # columns = subject.dataset[dataset].columns.to_list()
        # rule = dict()
        # for column in columns:
        #     if 'wpli' in column:
        #         rule.update({column: 'wpli'})
        #     if 'envelope' in column:
        #         rule.update({column: 'envelope'})
        # subject.dataset[dataset] = subject.dataset[dataset].rename(columns=rule, copy=True)

All computation has been already done, loading of the existing file with the solution...
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz_envelope', 'resected'], dtype='object')
Index(['4-8Hz_wpli', '4-8Hz

In [2]:
def sparse_graph(g: nx.Graph) -> nx.Graph:
    con = nx.to_numpy_matrix(g)
    out = nx.from_numpy_matrix(
        lmd.suppress(
            pd.DataFrame(
                con
            ),
            trigger=con.mean().mean(),
            optimal=0
        ).to_numpy()
    )
    mapping = {node: label_name for node, label_name in zip(out, g)}
    out = nx.relabel_nodes(out, mapping)
    return out

def graph_to_connectome(g: nx.Graph) -> pd.DataFrame:
    return pd.DataFrame(
        nx.to_numpy_matrix(g),
        index= g.nodes,
        columns = g.nodes
    )

def labels_for_hemispheres(g: nx.Graph) -> Tuple[List[str], List[str]]:
    labels_rh, labels_lh = list(), list()
    for node in g.nodes:
        if 'lh' in node:
            labels_lh.append(node)
        elif 'rh' in node:
            labels_rh.append(node)
        else:
            raise ValueError(f'Wrong node name: {node}')
    return labels_lh, labels_rh

def hemispheres_division_modularity(g: nx.Graph) -> float:
    labels_lh, labels_rh = labels_for_hemispheres(g)
    return nx.algorithms.community.quality.modularity(g, [labels_lh, labels_rh])

def hemispheres_division_performance(g: nx.Graph) -> float:
    labels_lh, labels_rh = labels_for_hemispheres(g)
    return nx.algorithms.community.quality.performance(g, [labels_lh, labels_rh])

def graph_to_hemispheres(g: nx.Graph) -> Tuple[nx.Graph, nx.Graph]:
    labels_lh, labels_rh = labels_for_hemispheres(g)
    return g.subgraph(labels_lh), g.subgraph(labels_rh)

def smallworldness(g: nx.Graph) -> Tuple[float, float]:
    return nx.algorithms.smallworld.sigma(g), nx.algorithms.smallworld.omega(g)

def s_for_hemispheres(subjects: List[Subject]) -> pd.DataFrame:
    dataset = pd.DataFrame()

    for subject in subjects:
        resected_hemisphere = None
        lupd, rupd = dict(), dict()

        for node in subject.nodes:
            if node.type == 'resected' and 'rh' in node.label.name:
                lupd.update({'resected': False})
                rupd.update({'resected': True})
                break

            elif node.type == 'resected' and 'lh' in node.label.name:
                lupd.update({'resected': True})
                rupd.update({'resected': False})
                break

        for freq in subject.graph:
            for method in subject.graph[freq]:
                G = sparse_graph(subject.graph[freq][method])
                lh, rh = graph_to_hemispheres(G)
                lupd.update({f's_for_{method}': nx.algorithms.smetric.s_metric(lh, normalized=False)/100000})
                rupd.update({f's_for_{method}': nx.algorithms.smetric.s_metric(rh, normalized=False)/100000})

        dataset = append_series(dataset, pd.Series(lupd), index=f'{subject.name}_lh')
        dataset = append_series(dataset, pd.Series(rupd), index=f'{subject.name}_rh')

    return dataset

import time

def metric_for_hemispheres(subjects: List[Subject], metric: Callable, **kwargs) -> pd.DataFrame:
    dataset = pd.DataFrame()

    for subject in subjects:
        start = time.time()
        resected_hemisphere = None
        lupd, rupd = dict(), dict()

        for node in subject.nodes:
            if node.type == 'resected' and 'rh' in node.label.name:
                lupd.update({'resected': False})
                rupd.update({'resected': True})
                break

            elif node.type == 'resected' and 'lh' in node.label.name:
                lupd.update({'resected': True})
                rupd.update({'resected': False})
                break

        for freq in subject.graph:
            for method in subject.graph[freq]:
                G = sparse_graph(subject.graph[freq][method])
                lh, rh = graph_to_hemispheres(G)
                lupd.update({f'{metric.__name__}_for_{method}': metric(lh, **kwargs)})
                rupd.update({f'{metric.__name__}_for_{method}': metric(rh, **kwargs)})

        dataset = append_series(dataset, pd.Series(lupd), index=f'{subject.name}_lh')
        dataset = append_series(dataset, pd.Series(rupd), index=f'{subject.name}_rh')
        print(f'{subject.name}: DONE, RUNTIME: {time.time() - start}')

    return dataset


In [222]:
# plt.imshow(
#     pd.DataFrame(
#         nx.to_numpy_matrix(
#             subjects[0].graph['7.5-12.5Hz']['wpli']
#         )
#     ).to_numpy())
# plt.show()
# plt.imshow(
#     lmd.suppress(
#         pd.DataFrame(
#             nx.to_numpy_matrix(
#                 subjects[0].graph['7.5-12.5Hz']['wpli']
#             )
#         ),
#         optimal=0
#     ).to_numpy())
# plt.show()
#
G = sparse_graph(subjects[0].graph['7.5-12.5Hz']['wpli'])
lh, rh = graph_to_hemispheres(G)
print(lh.number_of_edges(), lh.number_of_nodes())
# nx.draw(subjects[0].graph['7.5-12.5Hz']['wpli'])
# plt.show()
# nx.draw(G)
# plt.show()
# nx.draw(lh)
# plt.show()
# nx.draw(rh)
# plt.show()
# print('all: ', smallworldness(G))
import time
start = time.time()
# print(smallworldness(nx.complete_graph(5)))
# print(time.time() - start)
# print(smallworldness(nx.complete_graph(10)))
# print(time.time() - start)
# print(smallworldness(nx.complete_graph(75)))
# print(time.time() - start)
# print('lh: ', smallworldness(lh))
# print('rh: ', smallworldness(rh))

1249 75


In [19]:
for subject in subjects:
    print(subject.name)
    print('\n\tresected nodes:')
    for node in subject.nodes:
        if node.type == 'resected':
            print(f'\t{node.label.name}')
    for freq in subject.graph:
        for method in subject.graph[freq]:
            print(f'\n\t{freq}: {method}')
            G = sparse_graph(subject.graph[freq][method])
            lh, rh = graph_to_hemispheres(G)
            print(f'\n\themispheres division modularity: {hemispheres_division_modularity(G)}\n')
            print('\n\t s for lh', nx.algorithms.smetric.s_metric(lh, normalized=False)/100000)
            print('\n\t s for rh', nx.algorithms.smetric.s_metric(rh, normalized=False)/100000)

M2S2

	resected nodes:
	G_front_inf-Opercular-rh

	4-8Hz: wpli

	hemispheres division modularity: 0.002019055256530261


	 s for lh 15.23055

	 s for rh 15.11915

	4-8Hz: envelope

	hemispheres division modularity: -0.04155161417569045


	 s for lh 17.60433

	 s for rh 24.7102
R1D2

	resected nodes:
	G_front_sup-rh

	4-8Hz: wpli

	hemispheres division modularity: -0.021796476843180923


	 s for lh 10.645

	 s for rh 19.82521

	4-8Hz: envelope

	hemispheres division modularity: -0.002090163005340484


	 s for lh 20.93979

	 s for rh 28.13241
S1A2

	resected nodes:
	G_front_middle-lh

	4-8Hz: wpli

	hemispheres division modularity: -0.0033630500750618797


	 s for lh 14.95923

	 s for rh 13.54331

	4-8Hz: envelope

	hemispheres division modularity: -0.040138507497266246


	 s for lh 20.13583

	 s for rh 21.26211
S1H1

	resected nodes:
	G_orbital-rh
	S_circular_insula_ant-rh

	4-8Hz: wpli

	hemispheres division modularity: -0.03472405817904048


	 s for lh 12.63732

	 s for rh 11.20747

	

In [3]:
# dataset = metric_for_hemispheres(subjects, nx.algorithms.cluster.transitivity)
dataset = metric_for_hemispheres(subjects, nx.algorithms.global_efficiency)
# dataset = s_for_hemispheres(subjects)
print(dataset)

M2S2: DONE, RUNTIME: 1.264206886291504
R1D2: DONE, RUNTIME: 1.1880214214324951
S1A2: DONE, RUNTIME: 1.1472206115722656
S1H1: DONE, RUNTIME: 1.2721238136291504
K1V1: DONE, RUNTIME: 1.3501670360565186
L1P1: DONE, RUNTIME: 1.1227223873138428
M1G2: DONE, RUNTIME: 0.9356822967529297
G1V2: DONE, RUNTIME: 0.8481898307800293
G1R1: DONE, RUNTIME: 0.8676187992095947
M1N2: DONE, RUNTIME: 0.897014856338501
S1B1: DONE, RUNTIME: 0.9945433139801025
B1R1: DONE, RUNTIME: 0.9574844837188721
B1C2: DONE, RUNTIME: 0.9432084560394287
J1T2: DONE, RUNTIME: 0.8630290031433105
O1O2: DONE, RUNTIME: 0.8947618007659912
L2M1: DONE, RUNTIME: 0.9753520488739014
         resected  global_efficiency_for_wpli  global_efficiency_for_envelope
M2S2_lh     False                    0.721562                        0.730270
M2S2_rh      True                    0.723063                        0.760360
R1D2_lh     False                    0.690270                        0.737658
R1D2_rh      True                    0.732192     

In [5]:
y = dataset['resected'].to_numpy()
x = dataset.drop(['resected'], axis=1).to_numpy()

scaler = StandardScaler()
# scaler = MinMaxScaler()
# scaler = MaxAbsScaler()
# scaler = RobustScaler()
x = scaler.fit_transform(x)

acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

for i in range(100):

    x_train, x_test, y_train, y_test = train_test_split(x, y)

    # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
    # clf = AdaBoostClassifier(n_estimators=10)
    # clf = svm.SVC()
    # clf = svm.SVC(kernel='linear', class_weight={True: .8, False: 1})
    # clf = svm.SVC(kernel='sigmoid', class_weight={True: .8, False: 1})
    # clf = svm.SVC(class_weight={True: .8, False: 1})
    clf = SGDClassifier()
    # clf = KNeighborsClassifier(n_neighbors=3)
    # clf = LogisticRegression(class_weight={True: .8, False: 1})
    # clf = RandomForestClassifier(max_depth=20)
    # clf = GaussianNB()
    # clf = LinearDiscriminantAnalysis()
    # clf = QuadraticDiscriminantAnalysis()
    # clf = KMeans(n_clusters=2, algorithm='full')
    # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(10, 10), max_iter=1450)

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

max_acc = np.max(np.array(acc))
min_acc = np.min(np.array(acc))
moda_acc, cacc = sp.stats.mode(np.array(acc))
acc = sum(acc)/len(acc)
max_spec = np.max(np.array(spec))
min_spec = np.min(np.array(spec))
moda_spec, cspec = sp.stats.mode(np.array(spec))
spec = sum(spec)/len(spec)
max_sens = np.max(np.array(sens))
min_sens = np.min(np.array(sens))
moda_sens, csens = sp.stats.mode(np.array(sens))
sens = sum(sens)/len(sens)
max_pospred = np.max(np.array(pospred))
min_pospred = np.min(np.array(pospred))
moda_pospred, cpospred = sp.stats.mode(np.array(pospred))
pospred = sum(pospred)/len(pospred)
max_negpred = np.max(np.array(negpred))
min_negpred = np.min(np.array(negpred))
moda_negpred, cnegspred = sp.stats.mode(np.array(negpred))
negpred = sum(negpred)/len(negpred)
print('Accuracy: ', acc, min_acc, max_acc, moda_acc, cacc)
print('Specificity', spec, min_spec, max_spec, moda_spec, cspec)
print('Sensitivity: ', sens, min_sens, max_sens, moda_sens, csens)
print('Positive Predictive Value: ', pospred, min_pospred, max_pospred, moda_pospred, cpospred)
print('Negative Predictive Value: ', negpred, min_negpred, max_negpred, moda_negpred, cnegspred)

  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):


Accuracy:  0.73875 0.375 1.0 [0.875] [28]
Specificity 0.7245000000000001 0.0 1.0 [1.] [36]
Sensitivity:  0.7764285714285712 0.0 1.0 [1.] [45]
Positive Predictive Value:  0.7507261904761907 0.0 1.0 [1.] [36]
Negative Predictive Value:  0.7782857142857142 0.0 1.0 [1.] [43]


In [179]:
for subject in subjects:
    print(subject.name)
    print('\n\tresected nodes:')
    for node in subject.nodes:
        if node.type == 'resected':
            print(f'\t{node.label.name}')
    for freq in subject.graph:
        for method in subject.graph[freq]:
            print(f'\n\t{freq}: {method}')
            G = sparse_graph(subject.graph[freq][method])
            c = graph_to_connectome(G)
            lh, rh = graph_to_hemispheres(G)
            print(f'\n\themispheres division modularity: {hemispheres_division_modularity(G)}\n')
            comp = nx.algorithms.community.centrality.girvan_newman(G)
            for c in next(comp):
                print(f'\t{c}')

M2S2

	resected nodes:
	G_front_inf-Opercular-rh

	7.5-12.5Hz: wpli

	hemispheres division modularity: -0.035408499433554674

	{'G&S_cingul-Mid-Ant-rh', 'S_circular_insula_inf-lh', 'S_interm_prim-Jensen-rh', 'G_temporal_middle-lh', 'S_front_inf-rh', 'G_precentral-lh', 'G&S_frontomargin-lh', 'S_front_sup-lh', 'S_temporal_inf-rh', 'S_front_middle-rh', 'G_oc-temp_med-Lingual-rh', 'G_postcentral-lh', 'S_pericallosal-rh', 'G_pariet_inf-Angular-rh', 'Lat_Fis-ant-Horizont-lh', 'G_postcentral-rh', 'G_oc-temp_med-Parahip-rh', 'G&S_cingul-Mid-Post-lh', 'G&S_cingul-Mid-Post-rh', 'S_subparietal-lh', 'G_insular_short-lh', 'G_subcallosal-lh', 'G_front_inf-Opercular-rh', 'G_subcallosal-rh', 'S_calcarine-rh', 'G_occipital_middle-lh', 'S_cingul-Marginalis-lh', 'S_interm_prim-Jensen-lh', 'S_oc_middle&Lunatus-lh', 'G_precentral-rh', 'G_cuneus-rh', 'S_orbital_lateral-lh', 'Lat_Fis-ant-Vertical-rh', 'S_precentral-sup-part-lh', 'S_orbital-H_Shaped-rh', 'S_oc_sup&transversal-rh', 'G_pariet_inf-Angular-lh', '

KeyboardInterrupt: 

In [117]:
def current_flow(G):
    c = nx.edge_current_flow_betweenness_centrality(G)
    return max(c, key=c.get)

def heaviest(G):
    u, v, w = max(G.edges(data="weight"), key=itemgetter(2))
    del w
    return u, v

for subject in subjects:
    print(subject.name)
    print('\n\tresected nodes:')
    for node in subject.nodes:
        if node.type == 'resected':
            print(f'\t{node.label.name}')
    for freq in subject.graph:
        for method in subject.graph[freq]:
            print(f'\n\t{freq}: {method}')
            G = sparse_graph(subject.graph[freq][method])
            c = graph_to_connectome(G)
            lh, rh = graph_to_hemispheres(G)
            print(f'\n\themispheres division modularity: {hemispheres_division_modularity(G)}\n')
            comp = nx.algorithms.community.centrality.girvan_newman(
                lh,
                heaviest
            )
            for c in next(comp):
                print(f'\t{c}')
            comp = nx.algorithms.community.centrality.girvan_newman(
                rh,
                heaviest
            )
            for c in next(comp):
                print(f'\t{c}')

In [None]:
stat1 = SubjectsStatistic(subjects, 'resected', centrality_metric='eigen')
stat2 = SubjectsStatistic(subjects, 'resected', centrality_metric='close')
# stat3 = SubjectsStatistic(subjects, 'resected', centrality_metric='between')

In [None]:
subjects_brains = dict()
c = 'eigen'
for subject in subjects:
    subjects_brains.update({subject: dict()})
    lh, rh = pd.DataFrame(), pd.DataFrame()
    resected = None
    for i in range(len(subject.dataset[c].index)):
        if 'lh' in subject.dataset[c].iloc[i].name:
            if subject.dataset[c].iloc[i]['resected'] and resected is None:
                resected = 'left'
            lh = append_series(lh, subject.dataset[c].iloc[i][['4-7Hz_envelope', '4-7Hz_wpli']], index=subject.dataset[c].iloc[i].name)
        else:
            rh = append_series(rh, subject.dataset[c].iloc[i][['4-7Hz_envelope', '4-7Hz_wpli']], index=subject.dataset[c].iloc[i].name)
            if subject.dataset[c].iloc[i]['resected'] and resected is None:
                resected = 'right'
    subjects_brains[subject].update({'lh': lh, 'rh': rh, 'resected': resected})

In [None]:
y = list()
for brain in subjects_brains:
    if subjects_brains[brain]['resected'] == 'left':
        y.append(True)
        y.append(False)
    else:
        y.append(False)
        y.append(True)
y = np.array(y)
x = list()
for brain in subjects_brains:
    x.append(subjects_brains[brain]['lh'].to_numpy().T)
    x.append(subjects_brains[brain]['rh'].to_numpy().T)

x = np.array(x)
scaler = StandardScaler()

for i in range(x.shape[0]):
    x[i] = scaler.fit_transform(x[i].T).T
nsamples, nx, ny = x.shape
x = x.reshape((nsamples, nx*ny))
print(x[0, :].shape)

x_train, x_test, y_train, y_test = train_test_split(x, y)

In [None]:
# clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
# clf = AdaBoostClassifier(n_estimators=10)
# clf = svm.SVC(kernel='linear')
clf = svm.SVC(gamma=0.001)
# clf = svm.SVC(class_weight={True: 1, False: .8}, probability=True)
# clf = SGDClassifier()
# clf = KNeighborsClassifier(n_neighbors=3)
# clf = LogisticRegression(class_weight={True: 1, False: .8})
# clf = RandomForestClassifier(max_depth=20)
# clf = GaussianNB()
# clf = LinearDiscriminantAnalysis()
# clf = KMeans(n_clusters=2, algorithm='full')
# clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(10, 10), max_iter=1450)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(accuracy_score(y_test, y_pred))

In [None]:
#     s2 = subject.dataset['4-7Hz_imcoh']
#     print(s1.corr(s2))

In [None]:
print(stat1.datasets['true'].shape)
print(stat1.datasets['false'].shape)

In [20]:
test11 = stat1.test(state='resampled')
test12 = stat1.test(state='reflected')
test21 = stat2.test(state='resampled')
test22 = stat2.test(state='reflected')
test11_samples, test12_samples, test21_samples, test22_samples = list(), list(), list(), list()
for feature in test11.result:
    test11_samples.append(test11.result[feature][1])
    test12_samples.append(test12.result[feature][1])
    test21_samples.append(test21.result[feature][1])
    test22_samples.append(test22.result[feature][1])

test_samples = np.array([
    np.array(test11_samples),
    np.array(test12_samples),
    np.array(test21_samples),
    np.array(test22_samples)
])

df = pd.DataFrame(test_samples, columns=list(test11.result.keys()), index=[
    'resampled, eigen', 'reflected, eigen', 'resampled, close', 'reflected, close'
]).T
print(df)

          resampled, eigen  reflected, eigen  resampled, close  \
wpli              0.065466          0.069655          0.078691   
envelope          0.769480          0.099622          0.567217   

          reflected, close  
wpli              0.834195  
envelope          0.124772  


In [21]:
test11 = stat1.test(state='resampled', test='mannwhitneyu')
test12 = stat1.test(state='reflected', test='mannwhitneyu')
test21 = stat2.test(state='resampled', test='mannwhitneyu')
test22 = stat2.test(state='reflected', test='mannwhitneyu')
test11_samples, test12_samples, test21_samples, test22_samples = list(), list(), list(), list()
for feature in test11.result:
    test11_samples.append(test11.result[feature][1])
    test12_samples.append(test12.result[feature][1])
    test21_samples.append(test21.result[feature][1])
    test22_samples.append(test22.result[feature][1])

test_samples = np.array([
    np.array(test11_samples),
    np.array(test12_samples),
    np.array(test21_samples),
    np.array(test22_samples)
])

df = pd.DataFrame(test_samples, columns=list(test11.result.keys()), index=[
    'resampled, eigen', 'reflected, eigen', 'resampled, close', 'reflected, close'
]).T
print(df)

          resampled, eigen  reflected, eigen  resampled, close  \
wpli              0.053815          0.068242          0.151665   
envelope          0.452273          0.117178          0.271065   

          reflected, close  
wpli              0.367013  
envelope          0.223778  


In [None]:
# features = ['4-7Hz_wpli', '7-14Hz_wpli', '14-30Hz_wpli', '30-70Hz_wpli']
features = ['4-7Hz_wpli', '4-7Hz_envelope']
# features = ['4-7Hz_wpli']
# features = ['envelope']
# features = ['4-7Hz_wpli', '4-7Hz_psd']
true_data = stat1.datasets['true'][features]
false_data = stat1.datasets['false_res'][features]
true_data = true_data.assign(resected=True)
false_data = false_data.assign(resected=False)
dataset = pd.concat([true_data, false_data], axis=0)
dataset = dataset.sample(frac = 1)
print(dataset)

# 61

In [None]:
features = ['4-7Hz_wpli', '4-7Hz_envelope']
true_data = stat1.datasets['true'][features]
true_data_lead = lmd.lead_std(stat1.datasets['true'][features], take_std_from=stat1.datasets['false_res'][features], axis=1)
# true_data_lead = lmd.lead_mean(true_data_lead, take_mean_from=stat1.datasets['false_res'][features], axis=1)
false_data = stat1.datasets['false'][features]
false_res_data = stat1.datasets['false_res'][features]
false_rand_data = stat1.random_samples()[features]
means = pd.concat([false_data.mean(), false_res_data.mean(), false_rand_data.mean(), true_data.mean(), true_data_lead.mean()], axis=1)
stds = pd.concat([false_data.std(), false_res_data.std(), false_rand_data.std(), true_data.std(), true_data_lead.std()], axis=1)

means = pd.DataFrame(means.to_numpy(), index=features, columns=['false', 'false_res', 'false_rand', 'true', 'true_lead'])
stds = pd.DataFrame(stds.to_numpy(), index=features, columns=['false', 'false_res', 'false_rand', 'true', 'true_lead'])

print(means)
print(stds)

In [129]:
# features = ['0.5-4Hz_wpli', '4-7Hz_wpli', '7-14Hz_wpli', '14-30Hz_wpli', '30-70Hz_wpli']
# features = ['14-30Hz_wpli', '4-7Hz_wpli']
# features = ['4-7Hz_wpli', '0.5-4Hz_envelope', '4-7Hz_envelope', '7-14Hz_envelope', '14-30Hz_envelope', '30-70Hz_envelope']
# features = ['4-7Hz_wpli', '4-7Hz_envelope']
features = ['wpli', 'envelope']
# features = ['4-7Hz_envelope']
# features = ['4-7Hz_wpli', '4-7Hz_psd']
acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
stat = stat1
for i in range(1000):
    # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
    # clf = AdaBoostClassifier(n_estimators=10)
    clf = svm.SVC(class_weight={True: .9, False: 1})
    # clf = svm.SVC(class_weight={True: 1, False: .8}, probability=True)
    # clf = SGDClassifier()
    # clf = KNeighborsClassifier(n_neighbors=7, metric='chebyshev')
    # clf = LogisticRegression(class_weight={True: 1, False: .8})
    # clf = RandomForestClassifier(max_depth=20)
    # clf = GaussianNB()
    # clf = LinearDiscriminantAnalysis()
    # clf = KMeans(n_clusters=2, algorithm='full')
    # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(10, 10), max_iter=1450)
    true_data = stat.datasets['true'][features]
    # true_data = lmd.lead_std(stat.datasets['true'][features], take_std_from=stat1.datasets['false_res'][features], axis=1)
    # false_data = stat.random_samples()[features]
    # false_data = lmd.lead_std(stat.datasets['false_res'][features], take_std_from=stat1.datasets['true'][features], axis=1)
    # false_data = stat1.datasets['false_res'][features]
    # false_data = stat1.datasets['false'][features]
    # true_data = stat1.random_samples()[features]
    # false_data = stat.random_samples()[features]
    false_data = stat.datasets['false_mirror'][features]
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()
    # scaler = MinMaxScaler()
    # scaler = MaxAbsScaler()
    # scaler = RobustScaler()



    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)


    # dataset = lmd.lead_std(dataset, take_std_from=stat1.datasets['false_res'][features], axis=1)
    dataset = lmd.suppress(dataset, axis=1, optimal='max')
    # dataset = lmd.promote(dataset, axis=1, optimal='mean')
    # dataset = lmd.clusterize(dataset, axis=1, n_clusters=3, optimal='symclose')
    # dataset = lmd.binarize(dataset, axis=1)
    x = scaler.fit_transform(dataset)
    # x = dataset
    x_train, x_test, y_train, y_test = train_test_split(x, y)

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    # prob = clf.predict_proba(x_test).tolist()
    # for p, x, y in zip(prob, pred, y_test):
    #     print(y, x, p)

    # df = pd.DataFrame(np.array([np.array(y_test), pred]).T, columns=['actually', 'prediction'])
    # print(df)

    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

max_acc = np.max(np.array(acc))
min_acc = np.min(np.array(acc))
moda_acc, cacc = sp.stats.mode(np.array(acc))
acc = sum(acc)/len(acc)
max_spec = np.max(np.array(spec))
min_spec = np.min(np.array(spec))
moda_spec, cspec = sp.stats.mode(np.array(spec))
spec = sum(spec)/len(spec)
max_sens = np.max(np.array(sens))
min_sens = np.min(np.array(sens))
moda_sens, csens = sp.stats.mode(np.array(sens))
sens = sum(sens)/len(sens)
max_pospred = np.max(np.array(pospred))
min_pospred = np.min(np.array(pospred))
moda_pospred, cpospred = sp.stats.mode(np.array(pospred))
pospred = sum(pospred)/len(pospred)
max_negpred = np.max(np.array(negpred))
min_negpred = np.min(np.array(negpred))
moda_negpred, cnegspred = sp.stats.mode(np.array(negpred))
negpred = sum(negpred)/len(negpred)
print('Accuracy: ', acc, min_acc, max_acc, moda_acc, cacc)
print('Specificity', spec, min_spec, max_spec, moda_spec, cspec)
print('Sensitivity: ', sens, min_sens, max_sens, moda_sens, csens)
print('Positive Predictive Value: ', pospred, min_pospred, max_pospred, moda_pospred, cpospred)
print('Negative Predictive Value: ', negpred, min_negpred, max_negpred, moda_negpred, cnegspred)


  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):


Accuracy:  0.6264000000000011 0.25 0.9 [0.6] [233]
Specificity 0.8956440712065717 0.125 1.0 [1.] [340]
Sensitivity:  0.3644910270285273 0.0 1.0 [0.33333333] [91]
Positive Predictive Value:  0.7819563767931403 0.0 1.0 [1.] [337]
Negative Predictive Value:  0.5850666613829929 0.23076923076923078 1.0 [0.5] [83]


In [123]:
# features = ['0.5-4Hz_wpli', '4-7Hz_wpli', '7-14Hz_wpli', '14-30Hz_wpli', '30-70Hz_wpli']
# features = ['14-30Hz_wpli', '4-7Hz_wpli']
# features = ['4-7Hz_wpli', '0.5-4Hz_envelope']
# features = ['4-7Hz_wpli']
# features = ['envelope']
# features = ['4-7Hz_wpli', '4-7Hz_psd']
acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
stat = stat1
for i in range(1):
    # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
    # clf = AdaBoostClassifier(n_estimators=10, random_state=0)
    # clf = svm.SVC(kernel='linear')
    clf = svm.SVC(class_weight={True: .9, False: 1}, probability=True)
    # clf = SGDClassifier()
    # clf = KNeighborsClassifier(n_neighbors=3)
    # clf = LogisticRegression()
    # clf = RandomForestClassifier(max_depth=20)
    # clf = GaussianNB()
    # clf = LinearDiscriminantAnalysis()
    # clf = KMeans(n_clusters=2, algorithm='full')
    # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(10, 10), max_iter=1450)
    true_data = stat.datasets['true'][features]
    # true_data = lmd.lead_std(stat.datasets['true'][features], take_std_from=stat1.datasets['false_res'][features], axis=1)
    false_data = stat1.random_samples()[features]
    # false_data = lmd.lead_std(stat.datasets['false_res'][features], take_std_from=stat1.datasets['true'][features], axis=1)
    # false_data = stat1.datasets['false_res'][features]
    # false_data = stat1.datasets['false'][features]
    # true_data = stat1.random_samples()[features]
    # false_data = stat.random_samples()[features]
    # true_data = stat1.datasets['false_mirror'][features]
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()
    # scaler = MinMaxScaler()
    # scaler = MaxAbsScaler()
    # scaler = RobustScaler()



    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)


    # dataset = lmd.lead_std(dataset, take_std_from=stat1.datasets['false_res'][features], axis=1)
    dataset = lmd.suppress(dataset, axis=1, optimal='max')
    # dataset = lmd.promote(dataset, axis=1, optimal='min')
    # dataset = lmd.clusterize(dataset, axis=1, n_clusters=3, optimal='max')
    # dataset = lmd.binarize(dataset, axis=1)

    x = scaler.fit_transform(dataset)
    # x = dataset
    x_train, x_test, y_train, y_test = train_test_split(x, y)

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)

    prob = clf.predict_proba(x_test).tolist()
    for p, x, y in zip(prob, pred, y_test):
        print(y, x, p)

    # df = pd.DataFrame(np.array([np.array(y_test), pred]).T, columns=['actually', 'prediction'])
    # print(df)

    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

max_acc = np.max(np.array(acc))
min_acc = np.min(np.array(acc))
moda_acc, cacc = sp.stats.mode(np.array(acc))
acc = sum(acc)/len(acc)
max_spec = np.max(np.array(spec))
min_spec = np.min(np.array(spec))
moda_spec, cspec = sp.stats.mode(np.array(spec))
spec = sum(spec)/len(spec)
max_sens = np.max(np.array(sens))
min_sens = np.min(np.array(sens))
moda_sens, csens = sp.stats.mode(np.array(sens))
sens = sum(sens)/len(sens)
max_pospred = np.max(np.array(pospred))
min_pospred = np.min(np.array(pospred))
moda_pospred, cpospred = sp.stats.mode(np.array(pospred))
pospred = sum(pospred)/len(pospred)
max_negpred = np.max(np.array(negpred))
min_negpred = np.min(np.array(negpred))
moda_negpred, cnegspred = sp.stats.mode(np.array(negpred))
negpred = sum(negpred)/len(negpred)
print('Accuracy: ', acc, min_acc, max_acc, moda_acc, cacc)
print('Specificity', spec, min_spec, max_spec, moda_spec, cspec)
print('Sensitivity: ', sens, min_sens, max_sens, moda_sens, csens)
print('Positive Predictive Value: ', pospred, min_pospred, max_pospred, moda_pospred, cpospred)
print('Negative Predictive Value: ', negpred, min_negpred, max_negpred, moda_negpred, cnegspred)


True False [0.5607220940473112, 0.43927790595268884]
False False [0.4407745752873499, 0.5592254247126499]
False True [0.2196200078325757, 0.7803799921674244]
True False [0.40373810243809294, 0.596261897561907]
False False [0.586672234894149, 0.413327765105851]
False False [0.44203751137848574, 0.5579624886215142]
False False [0.586672234894149, 0.413327765105851]
True False [0.5535351222273772, 0.44646487777262295]
False False [0.586672234894149, 0.413327765105851]
True False [0.47928226616888486, 0.5207177338311152]
False False [0.586672234894149, 0.413327765105851]
True True [0.27711460311389474, 0.7228853968861053]
False True [0.2583733107196455, 0.7416266892803546]
False True [0.36486861568763956, 0.6351313843123605]
False False [0.586672234894149, 0.413327765105851]
True False [0.4710348003184959, 0.5289651996815041]
False True [0.2417079657337907, 0.7582920342662094]
True True [0.24049264690145836, 0.7595073530985416]
True False [0.5667423569451508, 0.4332576430548493]
False True

In [126]:
# features = ['0.5-4Hz_wpli', '4-7Hz_wpli', '7-14Hz_wpli', '14-30Hz_wpli', '30-70Hz_wpli']
# features = ['14-30Hz_wpli', '4-7Hz_wpli']
# features = ['4-7Hz_wpli', '4-7Hz_envelope']
# features = ['4-7Hz_wpli', '4-7Hz_envelope']
features = ['wpli', 'envelope']
# features = ['4-7Hz_wpli']
# features = ['envelope']
# features = ['4-7Hz_wpli', '4-7Hz_psd']
acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
stat = stat1
for i in range(1000):
    # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
    # clf = AdaBoostClassifier(n_estimators=10, random_state=0)
    # clf = svm.SVC(kernel='linear')
    clf = svm.SVC(class_weight={True: .9, False: 1}, probability=True)
    # clf = KNeighborsClassifier(n_neighbors=3)
    # clf = LogisticRegression()
    # clf = RandomForestClassifier(max_depth=20)
    # clf = GaussianNB()
    # clf = LinearDiscriminantAnalysis()
    # clf = KMeans(n_clusters=2, algorithm='full')
    # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(10, 10), max_iter=1450)

    true_data = stat.datasets['true'][features]
    # false_data = stat1.random_samples()[features]
    # false_data = stat1.datasets['false_res'][features]
    # true_data = stat1.random_samples()[features]
    # false_data = stat.random_samples()[features]
    # false_data = lmd.lead_std(stat.datasets['false_res'][features], take_std_from=stat.datasets['true'][features], axis=1)
    false_data = stat.datasets['false_mirror'][features]
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()
    # scaler = MinMaxScaler()
    # scaler = MaxAbsScaler()
    # scaler = RobustScaler()



    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)


    # dataset = lmd.lead_std(dataset, take_std_from=stat1.datasets['false_res'][features], axis=1)
    dataset = lmd.suppress(dataset, axis=1, optimal='max')
    # dataset = lmd.promote(dataset, axis=1, optimal='min')
    # dataset = lmd.clusterize(dataset, axis=1, n_clusters=3, optimal='max')
    # dataset = lmd.binarize(dataset, axis=1)


    x = scaler.fit_transform(dataset)
    # x = dataset
    scores = cross_val_score(clf, x, y, cv=10)
    acc.append(scores.mean())

print('Accuracy: ', sum(acc)/len(acc), min(acc), max(acc))

Accuracy:  0.6406214285714303 0.5982142857142858 0.6642857142857144


In [None]:
# from nodestimation.mlearning.features import prepare_connectivity, prepare_data
# con = pkl.load(open(subjects[0].data['con'], 'rb'))

subjects_dir, subjects_ = find_subject_dir()
labels = mne.read_labels_from_annot('B1C2', parc='aparc', subjects_dir=subjects_dir)
label_names = [label.name for label in labels]
lh_labels = [name for name in label_names if name.endswith('lh')]
rh_labels = [name for name in label_names if name.endswith('rh')]

label_ypos_lh = list()

for name in lh_labels:
    idx = label_names.index(name)
    ypos = np.mean(labels[idx].pos[:, 1])
    label_ypos_lh.append(ypos)

try:
    idx = label_names.index('Brain-Stem')

except ValueError:
    pass

else:
    ypos = np.mean(labels[idx].pos[:, 1])
    lh_labels.append('Brain-Stem')
    label_ypos_lh.append(ypos)


lh_labels = [label for (yp, label) in sorted(zip(label_ypos_lh, lh_labels))]

rh_labels = [label[:-2] + 'rh' for label in lh_labels
             if label != 'Brain-Stem' and label[:-2] + 'rh' in rh_labels]


node_colors = [label.color for label in labels]

node_order = lh_labels[::-1] + rh_labels

node_angles = mne.viz.circular_layout(label_names, node_order, start_pos=90,
                              group_boundaries=[0, len(label_names) // 2])

In [None]:
# methods = [
#     'coh',
#     'imcoh',
#     'plv',
#     'ciplv',
#     'ppc',
#     'pli'
# ]
#
# freq_bands = [
#     '0.5-4Hz',
#     '4-7Hz',
#     '7-14Hz',
#     '14-30Hz'
# ]
#
# for method in methods:
#     for freq_band in freq_bands:
#         fig = plt.figure(num=None, figsize=(25, 25), facecolor='black')
#         mne.viz.plot_connectivity_circle(con[freq_band][method]['con'][:, :, 0], label_names, n_lines=300,
#                                          node_angles=node_angles, node_colors=node_colors,
#                                          title='All-to-All Connectivity Epilepsy Condition ({} for {})'
#                                          .format(method, freq_band), padding=8, fontsize_title=35, fontsize_colorbar=25,
#                                          fontsize_names=20, fig=fig
#                                          )

In [None]:
for subject in subjects:
    fig, ax = plt.subplots(figsize=(15,15))
    display = nplt.plot_glass_brain(None, display_mode='lyrz', figure=fig, axes=ax)
    spared = [node.nilearn_coordinates for node in subject.nodes if node.type == 'spared']
    resected = [node.nilearn_coordinates for node in subject.nodes if node.type == 'resected']
    resection = read['resec-mni'](subject.data['resec-mni'])
    display.add_markers(resection, marker_color="violet", marker_size=1)
    display.add_markers(np.array(spared), marker_color="yellow", marker_size=100)
    display.add_markers(np.array(resected), marker_color="red", marker_size=250)



