In [1]:
import itertools
import operator
import re
from abc import *
from copy import deepcopy
from operator import itemgetter
from typing import *
from nodestimation.learning.connectoming import *
from nodestimation.learning.modification import normalize_df
import numpy as np
import scipy as sp
import networkx as nx
import mne
import time
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors._dist_metrics import DistanceMetric
from sklearn.utils import shuffle
from typing import List, Tuple
from sklearn.model_selection import train_test_split
from nodestimation.learning.estimation import collect_statistic, \
    compute_importance, collect_cross_statistic, make_selection_map, \
    select, separate_datasets, selected_statistic, choose_best, selected_data, make_feature_selection
from nodestimation.learning.informativeness import CrossInformativeness, Informativeness, SubjectsInformativeness, \
    NodesInformativeness
from nodestimation.learning.networking import sparse_graph, graph_to_hemispheres, hemispheres_division_modularity, \
    metric_for_hemispheres
from nodestimation.processing.features import prepare_features
from nodestimation.project import find_subject_dir, conditions_unique_code
from nodestimation.pipeline import pipeline
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import nibabel
import nilearn.plotting as nplt
from nodestimation.project.actions import read
import nodestimation as nd
from nodestimation.learning.modification import append_series, promote
import nodestimation.learning.modification as lmd
from nodestimation.project.subject import Subject
from sklearn.preprocessing import *
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectFromModel
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
import seaborn as sns
import matplotlib as mpl
from nodestimation.learning.selection import SubjectsStatistic, Wilcoxon, Mannwhitneyu, Test
from scipy.stats import wilcoxon
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
from scipy.fftpack import fftfreq, irfft, rfft
from scipy.fftpack import fftfreq, irfft, rfft

  warn("Fetchers from the nilearn.datasets module will be "


In [2]:
SUBJECTS = pipeline(
    methods=['wpli', 'envelope', 'coh', 'imcoh', 'plv', 'ciplv', 'ppc', 'pli', 'pli2_unbiased', 'wpli2_debiased'],
    freq_bands=(4, 8),
    centrality_metrics=['eigen', 'between', 'degree', 'info']
    )

ENGEL1 = [
    'B1C2',
    'B1R1',
    'G1R1',
    'G1V2',
    'J1T2',
    'K1V1',
    'L1P1',
    'M1G2',
    'M1N2',
    'O1O2',
    'R1D2',
    'S1A2',
    'S1B1',
    'S1H1',
    'S1U3'
]
ENGEL2 = [
    'L2M1',
    'M2S2',
    'N2K2',
    'P1H2'
]
ENGEL34 = [
    'N3S2',
    'S3R1',
    'K4L2'
]
REJECTED = [
    'S1U3',
    'P1H2'
]

All computation has been already done, loading of the existing file with the solution...


In [3]:
CONNECTOMES_KINDS = [
    'initial',
    'binary',
    'suppressed',
    'inverse',
    'inverse_binary',
    'inverse_suppressed',
    'spatial',
    'initial&spatial',
    'inverse&spatial',
    'bin&spatial',
    'supp&spatial',
    'inverse-bin&spatial',
    'inverse-supp&spatial'

]


CONNECTOMES_KIND = 'inverse-bin&spatial'

In [11]:
print(f'-------------------------------------------------------\n'
      f'{CONNECTOMES_KIND.capitalize()} connectomes computation\n'
      f'-------------------------------------------------------\n')

subjects = SUBJECTS.copy()

DATASETS = {
    subject.name: {
        freq: {
            method: make_connectome(subject, freq, method, CONNECTOMES_KIND, threshold=1)
           for method in subject.connectomes[freq]
        }
        for freq in subject.connectomes
    }
    for subject in subjects
}
print('Connectomes are prepared')

-------------------------------------------------------
Inverse-bin&spatial connectomes computation
-------------------------------------------------------

Connectomes are prepared


In [14]:
datasets = DATASETS.copy()

ignored_methods = [
#     'wpli',
#     'coh',
#     'imcoh',
#     'plv',
    'ciplv',
#     'ppc',
    'pli',
    'pli2_unbiased',
#     'wpli2_debiased'
]

features = ['imcoh', 'envelope']

close = dict()
between = dict()
eigen = dict()
degree = dict()
info = dict()
harmony = dict()
# katz = dict()

total_ignored_subjects = 0

for subject_name in datasets:
    is_corrupted = False
    close.update({subject_name: dict()})
    between.update({subject_name: dict()})
    eigen.update({subject_name: dict()})
    degree.update({subject_name: dict()})
    info.update({subject_name: dict()})
    harmony.update({subject_name: dict()})
    # katz.update({subject_name: dict()})
    for freq in datasets[subject_name]:
        if is_corrupted:
            break
        for method in datasets[subject_name][freq]:

            if method in ignored_methods:
                continue

            print(subject_name, freq, method)
            label_names = datasets[subject_name][freq][method].columns

            if nx.is_connected(
                    nx.convert_matrix.from_numpy_array(
                        datasets[subject_name][freq][method].to_numpy()
                    )
            ):
                arr  = datasets[subject_name][freq][method].to_numpy()
            else:
                try:
                    corrupted_subject = [subject for subject in subjects if subject.name == subject_name][0]
                    subjects.remove(corrupted_subject)
                except IndexError:
                    pass
                total_ignored_subjects += 1
                print(f'Graph is not connected: {subject_name}, {freq}, {method}.\n'
                      f'This subject will be ignored.\n'
                      f'Total of ignored subjects: {total_ignored_subjects}\n'
                      f'Total of spared subjects: {len(subjects)}\n')
                is_corrupted = True
                break
                # raise ValueError(f'Graph is not connected: {subject_name}, {freq}, {method}')
            # if matrix has negative values, shift it to make minimal value at least 0
            if arr.min().min() < 0:
                print(f'\tShifting connectome for {method} at {freq}')
                for i in range(arr.shape[0]):
                    for j in range(arr.shape[1]):
                        if i != j: arr[i, j] -= arr.min().min()
            G = nx.convert_matrix.from_numpy_array(arr)
            mapping = {node: label_name for node, label_name in zip(G, label_names)}
            G = nx.relabel_nodes(G, mapping)
            if CONNECTOMES_KIND == 'initial' or CONNECTOMES_KIND == 'inverse' or CONNECTOMES_KIND == 'spatial':
                G_sup = nx.convert_matrix.from_numpy_array(
                        lmd.suppress(
                            datasets[subject_name][freq][method],
                            trigger= arr.mean().mean(),
                            optimal=0
                        ).to_numpy()
                    )
                G_sup = nx.relabel_nodes(G_sup, mapping)
            else:
                G_sup = G
            for place, data in zip(
                [
                    close[subject_name],
                    between[subject_name],
                    eigen[subject_name],
                    info[subject_name],
                    degree[subject_name],
                    harmony[subject_name],
                    # katz[subject]
                ],[
                        nx.closeness_centrality(G, distance='weight'),
                        nx.betweenness_centrality(G_sup, weight='weight'),
                        nx.eigenvector_centrality_numpy(G, weight='weight'),
                        nx.information_centrality(G, weight='weight'),
                        dict(G.degree(weight='weight')),
                        nx.harmonic_centrality(G, distance='weight'),
                        # nx.katz_centrality(G, weight='weight', max_iter=100)
                    ]
            ):
                place.update({
                    freq + '_' + method: pd.Series(data)
                })

print('All is done')

P1H2 4-8Hz wpli
P1H2 4-8Hz envelope
P1H2 4-8Hz coh
P1H2 4-8Hz imcoh
P1H2 4-8Hz plv
P1H2 4-8Hz ppc
P1H2 4-8Hz wpli2_debiased
M2S2 4-8Hz wpli
M2S2 4-8Hz envelope
M2S2 4-8Hz coh
M2S2 4-8Hz imcoh
M2S2 4-8Hz plv
M2S2 4-8Hz ppc
M2S2 4-8Hz wpli2_debiased
R1D2 4-8Hz wpli
R1D2 4-8Hz envelope
R1D2 4-8Hz coh
R1D2 4-8Hz imcoh
R1D2 4-8Hz plv
R1D2 4-8Hz ppc
R1D2 4-8Hz wpli2_debiased
N3S2 4-8Hz wpli
N3S2 4-8Hz envelope
N3S2 4-8Hz coh
N3S2 4-8Hz imcoh
N3S2 4-8Hz plv
N3S2 4-8Hz ppc
N3S2 4-8Hz wpli2_debiased
Graph is not connected: N3S2, 4-8Hz, wpli2_debiased.
This subject will be ignored.
Total of ignored subjects: 1
Total of spared subjects: 15

S1A2 4-8Hz wpli
S1A2 4-8Hz envelope
S1A2 4-8Hz coh
S1A2 4-8Hz imcoh
S1A2 4-8Hz plv
S1A2 4-8Hz ppc
S1A2 4-8Hz wpli2_debiased
S1H1 4-8Hz wpli
S1H1 4-8Hz envelope
S1H1 4-8Hz coh
S1H1 4-8Hz imcoh
S1H1 4-8Hz plv
S1H1 4-8Hz ppc
S1H1 4-8Hz wpli2_debiased
K1V1 4-8Hz wpli
K1V1 4-8Hz envelope
K1V1 4-8Hz coh
K1V1 4-8Hz imcoh
K1V1 4-8Hz plv
K1V1 4-8Hz ppc
K1V1 4-8Hz wpli2

In [16]:
datasets_centrality = dict()

for subject in subjects:
    datasets_centrality.update({subject.name: dict()})
    datasets_centrality[subject.name].update({
        'close': pd.DataFrame(close[subject.name]),
        'between': pd.DataFrame(between[subject.name]),
        'eigen': pd.DataFrame(eigen[subject.name]),
        'info': pd.DataFrame(info[subject.name]),
        'degree': pd.DataFrame(degree[subject.name]),
        'harmony': pd.DataFrame(harmony[subject.name]),
        # 'katz': pd.DataFrame(katz[subject.name])
    })
    true = subject.datasets['eigen']['resected']
    datasets_centrality[subject.name]['close'] = datasets_centrality[subject.name]['close'].assign(resected=true)
    datasets_centrality[subject.name]['between'] = datasets_centrality[subject.name]['between'].assign(resected=true)
    datasets_centrality[subject.name]['eigen'] = datasets_centrality[subject.name]['eigen'].assign(resected=true)
    datasets_centrality[subject.name]['degree'] = datasets_centrality[subject.name]['degree'].assign(resected=true)
    datasets_centrality[subject.name]['info'] = datasets_centrality[subject.name]['info'].assign(resected=true)
    datasets_centrality[subject.name]['harmony'] = datasets_centrality[subject.name]['harmony'].assign(resected=true)
    # datasets_centrality[subject.name]['katz'] = datasets_centrality[subject.name]['katz'].assign(resected=true)

for subject in subjects:
    subject.datasets = datasets_centrality[subject.name]

print(f'Connectomes successfully updated. Now each subject has new {CONNECTOMES_KIND} connectomes')

stat1 = SubjectsStatistic(subjects, 'resected', centrality_metric='eigen')
print('Eigencentrality statistics are collected')
stat2 = SubjectsStatistic(subjects, 'resected', centrality_metric='between')
print('Betweenness centrality statistics are collected')
stat3 = SubjectsStatistic(subjects, 'resected', centrality_metric='close')
print('Closeness centrality statistics are collected')
stat4 = SubjectsStatistic(subjects, 'resected', centrality_metric='degree')
print('Degree centrality statistics are collected')
stat5 = SubjectsStatistic(subjects, 'resected', centrality_metric='info')
print('Info centrality statistics are collected')
stat6 = SubjectsStatistic(subjects, 'resected', centrality_metric='harmony')
print('Harmony centrality statistics are collected')
# stat7 = SubjectsStatistic(subjects, 'resected', centrality_metric='katz')
# print('Katz centrality statistics are collected')
print(f'All statistics for {CONNECTOMES_KIND} connectomes are collected')

Connectomes successfully updated. Now each subject has new inverse-bin&spatial connectomes
Eigencentrality statistics are collected
Betweenness centrality statistics are collected
Closeness centrality statistics are collected
Degree centrality statistics are collected
Info centrality statistics are collected
Harmony centrality statistics are collected
All statistics for inverse-bin&spatial connectomes are collected


In [18]:
stat = stat2
features = ['wpli', 'envelope']
freqfeat = ['4-8Hz_'+feat for feat in features]

acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

for _ in range(1000):
    clf = svm.SVC()
    true_data = stat.datasets['true'][freqfeat]
    false_data = stat.datasets['false_mirror'][freqfeat]
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    # for sample in true_data.index:
    #     subject = sample[:4]
    #     if not subject in ENGEL1 and\
    #         not subject in ENGEL2 or\
    #         subject in REJECTED:
    #         true_data = true_data.drop(index=sample)
    # for sample in false_data.index:
    #     if not any([subject in sample for subject in ENGEL1]) and\
    #             not any([subject in sample for subject in ENGEL2]) or\
    #             any([subject in sample for subject in REJECTED]):
    #         false_data = false_data.drop(index=sample)
    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()

    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)
    samples = [[sample] for sample in dataset.index.tolist()]

    x = scaler.fit_transform(dataset)
    x = np.append(x, samples, axis=1)
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    train_samples, test_samples = x_train[:, 2], x_test[:, 2]
    x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)

    acc.append(accuracy_score(y_test, pred))
    tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
    spec.append(tn / (tn + fp))
    sens.append(tp / (tp + fn))

    if np.isnan(tp/(tp + fp)):
        pospred.append(0)
    else:
        pospred.append(tp/(tp + fp))

    if np.isnan((tn/(tn + fn))):
        negpred.append(0)
    else:
        negpred.append(tn/(tn + fn))

print('acc: ', np.array(acc).mean())
print('spec: ', np.array(spec).mean())
print('sens: ', np.array(sens).mean())


  if np.isnan(tp/(tp + fp)):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan(tp/(tp + fp)):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan(tp/(tp + fp)):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan(tp/(tp + fp)):
  if np.isnan(tp/(tp + fp)):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan(tp/(tp + fp)):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))):
  if np.isnan((tn/(tn + fn))

acc:  0.4244807692307692
spec:  0.510300656294642
sens:  0.3913099989044293
