In [2]:

import itertools
import operator
import pickle
import re
from abc import *
from copy import deepcopy
from operator import itemgetter
from typing import *
import numpy as np
import scipy as sp
import networkx as nx
import mne
import time
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors._dist_metrics import DistanceMetric
from sklearn.utils import shuffle
from typing import List, Tuple
from sklearn.model_selection import train_test_split

from nodestimation.learning.connectoming import make_connectome
from nodestimation.learning.estimation import collect_statistic, \
    compute_importance, collect_cross_statistic, make_selection_map, \
    select, separate_datasets, selected_statistic, choose_best, selected_data, make_feature_selection
from nodestimation.learning.informativeness import CrossInformativeness, Informativeness, SubjectsInformativeness, \
    NodesInformativeness
from nodestimation.learning.networking import sparse_graph, graph_to_hemispheres, hemispheres_division_modularity, \
    metric_for_hemispheres
from nodestimation.processing.features import prepare_features
from nodestimation.project import find_subject_dir, conditions_unique_code
from nodestimation.pipeline import pipeline
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import nibabel
import nilearn.plotting as nplt
from nodestimation.project.actions import read
import nodestimation as nd
from nodestimation.learning.modification import append_series, promote
import nodestimation.learning.modification as lmd
from nodestimation.project.subject import Subject
from sklearn.preprocessing import *
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectFromModel
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
import matplotlib as mpl
from nodestimation.learning.selection import SubjectsStatistic, Wilcoxon, Mannwhitneyu, Test
from scipy.stats import wilcoxon
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
from scipy.fftpack import fftfreq, irfft, rfft
from scipy.fftpack import fftfreq, irfft, rfft

ENGEL1 = [
    'B1C2',
    'B1R1',
    'G1R1',
    'G1V2',
    'J1T2',
    'K1V1',
    'L1P1',
    'M1G2',
    'M1N2',
    'O1O2',
    'R1D2',
    'S1A2',
    'S1B1',
    'S1H1',
    'S1U3'
]
ENGEL2 = [
    'L2M1',
    'M2S2',
    'N2K2',
    'P1H2'
]
ENGEL3 = [
    'N3S2',
    'S3R1'
]
ENGEL4 = [
    'K4L2'
]
REJECTED = [
    'S1U3',
    'P1H2'
]

AGE = {
    'B1C2': 28.0,
    'B1R1': 16.0,
    'G1R1': 23.0,
    'G1V2': 5.0,
    'J1T2': 8.0,
    'K1V1': 10.0,
    'K4L2': 14.0,
    'L1P1': 16.0,
    'L2M1': 20.0,
    'M1G2': 8.0,
    'M1N2': 7.0,
    'M2S2': 18.0,
    'N2K2': 30.0,
    'N3S2': 10.0,
    'O1O2': 18.0,
    'R1D2': 6.5,
    'P1H2': 7.0,
    'S1A2': 12.0,
    'S1B1': 17.0,
    'S1H1': 28.0,
    'S3R1': 19.0,
    'S1U3': 15.0,
}

SEX = {
    'B1C2': 'f',
    'B1R1': 'm',
    'G1R1': 'f',
    'G1V2': 'm',
    'J1T2': 'f',
    'K1V1': 'f',
    'K4L2': 'f',
    'L1P1': 'f',
    'L2M1': 'f',
    'M1G2': 'm',
    'M1N2': 'm',
    'M2S2': 'm',
    'N2K2': 'm',
    'N3S2': 'm',
    'O1O2': 'f',
    'R1D2': 'f',
    'P1H2': 'm',
    'S1A2': 'm',
    'S1B1': 'm',
    'S1H1': 'm',
    'S3R1': 'm',
    'S1U3': 'f',
}

SUBJECTS = pipeline(
    methods=['wpli', 'envelope', 'coh', 'imcoh', 'plv', 'ciplv', 'ppc', 'pli', 'pli2_unbiased', 'wpli2_debiased'],
    freq_bands=[(4, 8), (8, 14), (6, 8), (8, 10)],
)

CONNECTOMES_KIND = 'initial'

stats = pickle.load(open(
    '/home/user/Documents/NodesEstimation/theta&late_theta&early_alpha&alpha/all_stats_all_subjects_theta&l-theta&e-alpha&alpha.pkl',
    'rb'
))

graphs = pickle.load(open(
    '/home/user/Documents/NodesEstimation/theta&late_theta&early_alpha&alpha/all_graphs_all_subjects_theta&l-theta&e-alpha&alpha.pkl',
    'rb'
))

All computation has been already done, loading of the existing file with the solution...


In [4]:
stat1 = stats[0]
stat2 = stats[-1]

cross_subjects_informativeness = CrossInformativeness()
for freq in ['4-8', '6-8', '8-14', '8-10']:
    for _ in range(100):
        features = [f'{freq}Hz_wpli', f'{freq}Hz_envelope']

        full_true_data_1 = stat1.datasets['true'][f'{freq}Hz_wpli']
        full_true_data_2 = stat2.datasets['true'][f'{freq}Hz_envelope']
        full_false_data_1 = stat1.datasets['false_mirror'][f'{freq}Hz_wpli']
        full_false_data_2 = stat2.datasets['false_mirror'][f'{freq}Hz_envelope']

        full_true_data = pd.concat([full_true_data_1, full_true_data_2], axis=1)

        full_false_data = pd.concat([full_false_data_1, full_false_data_2], axis=1)

        acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

        subjects_informativeness = SubjectsInformativeness()

        for _ in range(100):

            # Engel1&2 - train, the rest - test
            scaler = StandardScaler()
            clf = svm.SVC(kernel='sigmoid')
            engel_1_2_true = full_true_data.loc[(elem[:4] in [*ENGEL1, *ENGEL2] and not elem[:4] in REJECTED for elem in full_true_data.index)].assign(resected=True)
            engel_1_2_false = full_false_data.loc[(elem[:4] in [*ENGEL1, *ENGEL2] and not elem[:4] in REJECTED for elem in full_false_data.index)].assign(resected=False)
            engel_3_4_true = full_true_data\
                .loc[(elem[:4] not in [*ENGEL1, *ENGEL2, *REJECTED] for elem in full_true_data.index)].assign(resected=True)
            engel_3_4_false = full_false_data\
                .loc[(elem[:4] not in [*ENGEL1, *ENGEL2, *REJECTED] for elem in full_false_data.index)].assign(resected=False)
            engel_1_2 = pd.concat([engel_1_2_true, engel_1_2_false], axis=0)
            engel_1_2_train, engel_1_2_test = np.array_split(engel_1_2.sample(frac=1), 2)
            engel_3_4 = pd.concat([engel_3_4_true, engel_3_4_false], axis=0)

            y_train = engel_1_2_train['resected'].to_numpy()
            x_train = scaler.fit_transform(engel_1_2_train.drop(['resected'], axis=1))

            engel_test = pd.concat([engel_3_4, engel_1_2_test], axis=0).sample(frac=1)
            # engel_test = engel_1_2_test
            test_samples = engel_test.index.to_list()
            y_test = engel_test['resected'].to_numpy()
            x_test = scaler.fit_transform(engel_test.drop(['resected'], axis=1))

            clf.fit(x_train, y_train)
            pred = clf.predict(x_test)

            for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
                subjects_informativeness.informativeness = sample, actual, 'correct'\
                    if predicted == actual else 'wrong'
            acc.append(accuracy_score(y_test, pred))

            # if not any([tn + fp == 0, tp + fn == 0, tn + fn == 0, tp + fp == 0]):
            #     spec.append(tn / (tn + fp))
            #     sens.append(tp / (tp + fn))
            #     negpred.append(tn/(tn + fn))
            #     pospred.append(tp/(tp + fp))
        # print('acc: ', np.array(acc).mean())
        cross_subjects_informativeness.informativeness = subjects_informativeness

    df = pd.DataFrame([
        pd.Series(
            dict(
                sorted(
                    cross_subjects_informativeness.acc().items(),
                    key=lambda item: item[0]
                )
            )
        ),
        pd.Series(
            dict(
                sorted(
                    cross_subjects_informativeness.tnr().items(),
                    key=lambda item: item[0]
                )
            )
        ),
        pd.Series(
            dict(
                sorted(
                    cross_subjects_informativeness.tpr().items(),
                    key=lambda item: item[0]
                )
            )
        )
    ])\
        .T\
        .rename(
        columns={
            0: 'acc',
            1: 'spec',
            2: 'sens'
        }
    )

    print(df)
    df.to_csv(f'/home/user/Documents/NodesEstimation/cross-classification/cross_regions_analysis_all_subjects_{freq}Hz_wpli&envelope_eigen&perloc.csv')

           acc      spec      sens
B1C2  0.505550  0.505232  0.505718
B1R1  0.651287  0.674246  0.633086
G1R1  0.801248  0.866715  0.755210
G1V2  0.806765  0.794193  0.820612
J1T2  0.452564  0.206851  0.473933
K1V1  0.546331  0.574861  0.534447
K4L2  0.544725  0.529280  0.594656
L1P1  0.498526  0.497133  0.500745
L2M1  0.550873  0.612770  0.533573
M1G2  0.697005  0.744488  0.665769
M1N2  0.520435  0.542897  0.514203
M2S2  0.587583  0.551792  0.746193
N2K2  0.461877  0.389976  0.475951
N3S2  0.400425  0.394159  0.405990
O1O2  0.497051  0.485903  0.499327
R1D2  0.613329  0.574413  0.778680
S1A2  0.066070  0.083800  0.046220
S1B1  0.507413  0.510222  0.504959
S1H1  0.843321  0.868674  0.820616
S3R1  0.683367  0.679121  0.687818
           acc      spec      sens
B1C2  0.466701  0.450994  0.475497
B1R1  0.600155  0.631176  0.579669
G1R1  0.755465  0.828257  0.708213
G1V2  0.701444  0.671962  0.744303
J1T2  0.471020  0.300912  0.485594
K1V1  0.556360  0.579444  0.544350
K4L2  0.546925  0.53

In [9]:
for freq in ['4-8', '6-8', '8-14', '8-10']:
    cross_hemispheres_informativeness = CrossInformativeness()
    cross_subjects_informativeness = CrossInformativeness()
    wpli = graphs[0][f'transitivity_for_wpli_{freq}Hz']
    wpli = wpli\
        .loc[(elem[:4] not in REJECTED for elem in wpli.index)]
    envelope = graphs[2][f'global_efficiency_for_envelope_{freq}Hz']
    envelope = envelope\
        .loc[(elem[:4] not in REJECTED for elem in envelope.index)]
    assert all(graphs[0]['resected'] == graphs[2]['resected'])
    labels = graphs[0]['resected']
    labels = labels.loc[(elem[:4] not in REJECTED for elem in labels.index)]
    dataset = pd.concat([wpli, envelope, labels], axis=1)

    for _ in range(100):
        hemispheres_informatoveness = Informativeness()
        subjects_informativeness = SubjectsInformativeness()
        acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

        for i in range(100):

            y = dataset['resected'].to_numpy()
            x = dataset[[f'transitivity_for_wpli_{freq}Hz', f'global_efficiency_for_envelope_{freq}Hz']].to_numpy()

            scaler = StandardScaler()
            x = scaler.fit_transform(x)

            samples = [[sample] for sample in dataset.index.tolist()]

            x = np.append(x, samples, axis=1)
            engel_1_2_set_x, engel_1_2_set_y = list(), list()
            test_set_x, test_set_y = list(), list()
            for sample_x, sample_y in zip(x, y):
                if any([subject in sample_x[-1] for subject in [*ENGEL1, *ENGEL2]]):
                    engel_1_2_set_x.append(sample_x)
                    engel_1_2_set_y.append(sample_y)
                else:
                    test_set_x.append(sample_x)
                    test_set_y.append(sample_y)

            x_train, x_test_add, y_train, y_test_add = train_test_split(
                engel_1_2_set_x,
                engel_1_2_set_y,
                train_size=0.5
            )
            x_test = test_set_x + x_test_add
            y_test = test_set_y + y_test_add
            # x_test = x_test_add
            # y_test = y_test_add
            train_samples = np.array([sample[2] for sample in x_train])
            test_samples = np.array([sample[2] for sample in x_test])
            x_train = np.array([sample[0:2] for sample in x_train])
            x_test = np.array([sample[0:2] for sample in x_test])

            clf = svm.SVC()
            clf.fit(x_train, y_train)
            pred = clf.predict(x_test)
            acc.append(accuracy_score(y_test, pred))
            # if not any([tn + fp == 0, tp + fn == 0, tn + fn == 0, tp + fp == 0]):
            #     spec.append(tn / (tn + fp))
            #     sens.append(tp / (tp + fn))
            #     negpred.append(tn/(tn + fn))
            #     pospred.append(tp/(tp + fp))

            for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
                hemispheres_informatoveness.informativeness = sample, actual, 'correct' \
                if predicted == actual else 'wrong'
                subjects_informativeness.informativeness = sample, actual, 'correct' \
                if predicted == actual else 'wrong'

        # print('acc: ', np.array(acc).mean())
        cross_subjects_informativeness.informativeness = subjects_informativeness
        cross_hemispheres_informativeness.informativeness = hemispheres_informatoveness
    df = pd.DataFrame([
        pd.Series(
            dict(
                sorted(
                    cross_subjects_informativeness.acc().items(),
                    key=lambda item: item[0]
                )
            )
        ),
        pd.Series(
            dict(
                sorted(
                    cross_subjects_informativeness.tnr().items(),
                    key=lambda item: item[0]
                )
            )
        ),
        pd.Series(
            dict(
                sorted(
                    cross_subjects_informativeness.tpr().items(),
                    key=lambda item: item[0]
                )
            )
        )
    ])\
        .T\
        .rename(
        columns={
            0: 'acc',
            1: 'spec',
            2: 'sens'
        }
    )

    print(df)

    df.to_csv(f'/home/user/Documents/NodesEstimation/cross-classification/cross_hemispheres_analysis_all_subjects_{freq}Hz_wpli&envelope_eigen&perloc.csv')


           acc      spec      sens
B1C2  0.387025  0.425865  0.263017
B1R1  0.733151  0.662258  0.914053
G1R1  0.968247  0.968237  0.968257
G1V2  0.032632  0.044150  0.020309
J1T2  0.634095  0.614744  0.661395
K1V1  0.770778  0.926459  0.698992
K4L2  0.347910  0.406705  0.085797
L1P1  0.915840  0.874596  0.965916
L2M1  0.899772  0.924839  0.877616
M1G2  0.493375  0.449341  0.494973
M1N2  0.925948  0.891770  0.969039
M2S2  0.741942  0.725000  0.762147
N2K2  0.544962  0.582637  0.530601
N3S2  0.230500  0.121063  0.290891
O1O2  0.790465  0.715065  0.953474
R1D2  0.950579  0.929520  0.973749
S1A2  0.444878  0.466018  0.390575
S1B1  0.720060  0.685280  0.770177
S1H1  0.735613  0.661458  0.948447
S3R1  0.776460  0.696240  0.971020
           acc      spec      sens
B1C2  0.477262  0.481114  0.471339
B1R1  0.645281  0.617536  0.689015
G1R1  0.780328  0.916704  0.711799
G1V2  0.227209  0.280136  0.141056
J1T2  0.425996  0.448551  0.371400
K1V1  0.725987  0.886589  0.658102
K4L2  0.234278  0.30