In [1]:
import itertools
import operator
import pickle
import re
from abc import *
from copy import deepcopy
from operator import itemgetter
from typing import *
import numpy as np
import scipy as sp
import networkx as nx
import mne
import time
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors._dist_metrics import DistanceMetric
from sklearn.utils import shuffle
from typing import List, Tuple
from sklearn.model_selection import train_test_split

from nodestimation.learning.connectoming import make_connectome
from nodestimation.learning.estimation import collect_statistic, \
    compute_importance, collect_cross_statistic, make_selection_map, \
    select, separate_datasets, selected_statistic, choose_best, selected_data, make_feature_selection
from nodestimation.learning.informativeness import CrossInformativeness, Informativeness, SubjectsInformativeness, \
    NodesInformativeness
from nodestimation.learning.networking import sparse_graph, graph_to_hemispheres, hemispheres_division_modularity, \
    metric_for_hemispheres
from nodestimation.processing.features import prepare_features
from nodestimation.project import find_subject_dir, conditions_unique_code
from nodestimation.pipeline import pipeline
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import nibabel
import nilearn.plotting as nplt
from nodestimation.project.actions import read
import nodestimation as nd
from nodestimation.learning.modification import append_series, promote
import nodestimation.learning.modification as lmd
from nodestimation.project.subject import Subject
from sklearn.preprocessing import *
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectFromModel
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
import matplotlib as mpl
from nodestimation.learning.selection import SubjectsStatistic, Wilcoxon, Mannwhitneyu, Test
from scipy.stats import wilcoxon
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
from scipy.fftpack import fftfreq, irfft, rfft
from scipy.fftpack import fftfreq, irfft, rfft

ENGEL1 = [
    'B1C2',
    'B1R1',
    'G1R1',
    'G1V2',
    'J1T2',
    'K1V1',
    'L1P1',
    'M1G2',
    'M1N2',
    'O1O2',
    'R1D2',
    'S1A2',
    'S1B1',
    'S1H1',
    'S1U3'
]
ENGEL2 = [
    'L2M1',
    'M2S2',
    'N2K2',
    'P1H2'
]
ENGEL3 = [
    'N3S2',
    'S3R1'
]
ENGEL4 = [
    'K4L2'
]
REJECTED = [
    'S1U3',
    'P1H2'
]

AGE = {
    'B1C2': 28.0,
    'B1R1': 16.0,
    'G1R1': 23.0,
    'G1V2': 5.0,
    'J1T2': 8.0,
    'K1V1': 10.0,
    'K4L2': 14.0,
    'L1P1': 16.0,
    'L2M1': 20.0,
    'M1G2': 8.0,
    'M1N2': 7.0,
    'M2S2': 18.0,
    'N2K2': 30.0,
    'N3S2': 10.0,
    'O1O2': 18.0,
    'R1D2': 6.5,
    'P1H2': 7.0,
    'S1A2': 12.0,
    'S1B1': 17.0,
    'S1H1': 28.0,
    'S3R1': 19.0,
    'S1U3': 15.0,
}

SEX = {
    'B1C2': 'f',
    'B1R1': 'm',
    'G1R1': 'f',
    'G1V2': 'm',
    'J1T2': 'f',
    'K1V1': 'f',
    'K4L2': 'f',
    'L1P1': 'f',
    'L2M1': 'f',
    'M1G2': 'm',
    'M1N2': 'm',
    'M2S2': 'm',
    'N2K2': 'm',
    'N3S2': 'm',
    'O1O2': 'f',
    'R1D2': 'f',
    'P1H2': 'm',
    'S1A2': 'm',
    'S1B1': 'm',
    'S1H1': 'm',
    'S3R1': 'm',
    'S1U3': 'f',
}

SUBJECTS = pipeline(
    methods=['wpli', 'envelope', 'coh', 'imcoh', 'plv', 'ciplv', 'ppc', 'pli', 'pli2_unbiased', 'wpli2_debiased'],
    freq_bands=(4, 8),
    centrality_metrics=['eigen', 'between', 'degree', 'info']
    )

CONNECTOMES_KIND = 'initial'

  warn("Fetchers from the nilearn.datasets module will be "


All computation has been already done, loading of the existing file with the solution...


In [2]:
subjects = SUBJECTS.copy()

DATASETS = {
    subject.name: {
        freq: {
            method: make_connectome(subject, freq, method, CONNECTOMES_KIND, threshold=1)
           for method in subject.connectomes[freq]
        }
        for freq in subject.connectomes
    }
    for subject in subjects
}
print('Connectomes are prepared')

GRAPHS = [
    metric_for_hemispheres(subjects, nx.algorithms.cluster.transitivity),
    metric_for_hemispheres(subjects, nx.algorithms.smetric.s_metric, normalized=False),
    metric_for_hemispheres(subjects, nx.algorithms.global_efficiency),
]
print('Graphs are prepared')

Graphs are prepared


In [6]:
path = f'/home/user//Documents/{CONNECTOMES_KIND}_stats.pkl'
stats = pickle.load(open(path, 'rb'))

In [84]:
# Engel 1 and 2, global efficiency

datasets = GRAPHS.copy()

for i in range(len(datasets)):
    for sample in datasets[i].index:
        subject = sample[:4]
        if subject in REJECTED:
            datasets[i] = datasets[i].drop(index=sample)
        # if subject not in ENGEL1 and subject not in ENGEL2 or subject in REJECTED:
        #     datasets[i] = datasets[i].drop(index=sample)

In [90]:
# hemispheres

names = ['transitivity', 's_metric', 'global_efficiency']
cross_hemispheres_informativeness_arr = list()
cross_subjects_informativeness_arr = list()

confusions = list()
# tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
#     confusions.append((tn, fp, fn, tp))

for dataset, name in zip(datasets, names):
    cross_hemispheres_informativeness = CrossInformativeness()
    cross_subjects_informativeness = CrossInformativeness()

    for _ in range(100):
        hemispheres_informatoveness = Informativeness()
        subjects_informativeness = SubjectsInformativeness()
        acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

        for i in range(100):

            y = dataset['resected'].to_numpy()
            x = dataset[[f'{name}_for_wpli_4-8Hz', f'{name}_for_envelope_4-8Hz']].to_numpy()

            scaler = StandardScaler()
            x = scaler.fit_transform(x)

            samples = [[sample] for sample in dataset.index.tolist()]

            x = np.append(x, samples, axis=1)
            engel_1_2_set_x, engel_1_2_set_y = list(), list()
            test_set_x, test_set_y = list(), list()
            for sample_x, sample_y in zip(x, y):
                if any([subject in sample_x[-1] for subject in [*ENGEL1, *ENGEL2]]):
                    engel_1_2_set_x.append(sample_x)
                    engel_1_2_set_y.append(sample_y)
                else:
                    test_set_x.append(sample_x)
                    test_set_y.append(sample_y)

            x_train, x_test_add, y_train, y_test_add = train_test_split(
                engel_1_2_set_x,
                engel_1_2_set_y,
                train_size=0.5
            )
            x_test = test_set_x + x_test_add
            y_test = test_set_y + y_test_add
            # train_samples, test_samples = x_train[:][2], x_test[:][2]
            train_samples = np.array([sample[2] for sample in x_train])
            test_samples = np.array([sample[2] for sample in x_test])
            x_train = np.array([sample[0:2] for sample in x_train])
            x_test = np.array([sample[0:2] for sample in x_test])

            clf = svm.SVC()
            clf.fit(x_train, y_train)
            pred = clf.predict(x_test)

            # if not any([tn + fp == 0, tp + fn == 0, tn + fn == 0, tp + fp == 0]):
            #     spec.append(tn / (tn + fp))
            #     sens.append(tp / (tp + fn))
            #     negpred.append(tn/(tn + fn))
            #     pospred.append(tp/(tp + fp))

            for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
                hemispheres_informatoveness.informativeness = sample, actual, 'correct' \
                if predicted == actual else 'wrong'
                subjects_informativeness.informativeness = sample, actual, 'correct' \
                if predicted == actual else 'wrong'

        cross_subjects_informativeness.informativeness = subjects_informativeness
        cross_hemispheres_informativeness.informativeness = hemispheres_informatoveness

    cross_hemispheres_informativeness_arr.append(cross_hemispheres_informativeness)
    cross_subjects_informativeness_arr.append(cross_subjects_informativeness)

In [91]:
series = list()
index = [
    'Transitivity',
    'S metric',
    'Global Efficiency',
    'Engel'
]

eng = dict()
for subject_name in ENGEL1 + ENGEL2 + ENGEL3 + ENGEL4:
    if subject_name in REJECTED:
        continue
    else:
        if subject_name in ENGEL1:
            eng.update({
                subject_name: 1
            })
        if subject_name in ENGEL2:
            eng.update({
                subject_name: 2
            })
        if subject_name in ENGEL3:
            eng.update({
                subject_name: 3
            })
        if subject_name in ENGEL4:
            eng.update({
                subject_name: 4
            })

for info in cross_subjects_informativeness_arr:
    series.append(
        pd.Series(
                dict(
                sorted(
                    info.acc().items(),
                    key=lambda item: item[0]
                )
            )
        )
    )

series.append(
    pd.Series(
                dict(
                sorted(
                    eng.items(),
                    key=lambda item: item[0]
                )
            )
        )
)

df = pd.DataFrame(
    series,
    index=index
)
df = df.T
print(df)
df.to_csv('~/Documents/Subjects_informativeness-hemispheres-level.csv')

      Transitivity  S metric  Global Efficiency  Engel
B1C2      0.273291  0.355502           0.390818    1.0
B1R1      0.875153  0.877188           0.958836    1.0
G1R1      0.962443  0.968370           0.978824    1.0
G1V2      0.479753  0.490202           0.460029    1.0
J1T2      0.288805  0.584870           0.662457    1.0
K1V1      0.214108  0.527049           0.525609    1.0
K4L2      0.566000  0.558700           0.490300    4.0
L1P1      0.853683  0.950030           0.976848    1.0
L2M1      0.736296  0.887759           0.641966    2.0
M1G2      0.631346  0.727425           0.763141    1.0
M1N2      0.864098  0.832760           0.928898    1.0
M2S2      0.647733  0.695212           0.779264    2.0
N2K2      0.486773  0.523013           0.864838    2.0
N3S2      0.051224  0.040018           0.051564    3.0
O1O2      0.849875  0.918361           0.970726    1.0
R1D2      0.944319  0.860090           0.955945    1.0
S1A2      0.324196  0.392767           0.460512    1.0
S1B1      

In [None]:
# regions

for stat in stats:
    print(stat)
    start = time.time()
    cross_subjects_informativeness = CrossInformativeness()

    for _ in range(100):
        features = ['4-8Hz_wpli', '4-8Hz_envelope']
        acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

        subjects_informativeness = SubjectsInformativeness()

        for _ in range(100):
            clf = svm.SVC()
            true_data = stat.datasets['true'][features]
            false_data = stat.datasets['false_mirror'][features]
            true_data = true_data.assign(resected=True)
            false_data = false_data.assign(resected=False)
            for sample in true_data.index:
                subject = sample[:4]
                # if subject in REJECTED:
                #     true_data = true_data.drop(index=sample)
                if subject not in ENGEL1 and subject not in ENGEL2 or subject in REJECTED:
                    true_data = true_data.drop(index=sample)
            for sample in false_data.index:
                subject = sample[:4]
                # if subject in REJECTED:
                #     false_data = false_data.drop(index=sample)
                if subject not in ENGEL1 and subject not in ENGEL2 or subject in REJECTED:
                    false_data = false_data.drop(index=sample)
            dataset = pd.concat([true_data, false_data], axis=0)
            dataset = dataset.sample(frac = 1)


            scaler = StandardScaler()

            y = dataset['resected'].to_numpy()
            dataset = dataset.drop(['resected'], axis=1)
            samples = [[sample] for sample in dataset.index.tolist()]

            x = scaler.fit_transform(dataset)
            x = np.append(x, samples, axis=1)
            x_train, x_test, y_train, y_test = train_test_split(x, y)
            train_samples, test_samples = x_train[:, 2], x_test[:, 2]
            x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

            clf.fit(x_train, y_train)
            pred = clf.predict(x_test)

            for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
                subjects_informativeness.informativeness = sample, actual, 'correct'\
                    if predicted == actual else 'wrong'
            # acc.append(accuracy_score(y_test, pred))

            # if not any([tn + fp == 0, tp + fn == 0, tn + fn == 0, tp + fp == 0]):
            #     spec.append(tn / (tn + fp))
            #     sens.append(tp / (tp + fn))
            #     negpred.append(tn/(tn + fn))
            #     pospred.append(tp/(tp + fp))
        cross_subjects_informativeness.informativeness = subjects_informativeness

    print(f'RUNTIME: {time.time() - start}')
    cross_subjects_informativeness_arr.append(cross_subjects_informativeness)

In [None]:
series = list()
index = [
    'Transitivity',
    'S metric',
    'Global Efficiency',
    'Eigencentrality',
    'Betweenness',
    'Closeness',
    'Degree',
    'Info',
    'Harmony',
    'Katz',
    'Percolation',
    'Engel'
]

eng = dict()
for subject_name in ENGEL1 + ENGEL2:
    if subject_name in REJECTED:
        continue
    else:
        eng.update({
            subject_name: 1 if subject_name in ENGEL1 else 2
        })

for info in cross_subjects_informativeness_arr:
    series.append(
        pd.Series(
                dict(
                sorted(
                    info.acc().items(),
                    key=lambda item: item[0]
                )
            )
        )
    )

series.append(
    pd.Series(
                dict(
                sorted(
                    eng.items(),
                    key=lambda item: item[0]
                )
            )
        )
)

df = pd.DataFrame(
    series,
    index=index
)
df = df.T
print(df)
df.to_csv('~/Documents/Subjects_informativeness-Engel1&2.csv')

In [None]:
# hemispheres

names = ['transitivity', 's_metric', 'global_efficiency']
n_iter = 1000
confusions = list()
#     confusions.append((tn, fp, fn, tp))

for dataset, name in zip(datasets, names):

    tn_c, tp_c, fn_c, fp_c = 0, 0, 0, 0
    for i in range(n_iter):

        y = dataset['resected'].to_numpy()
        x = dataset[[f'{name}_for_wpli_4-8Hz', f'{name}_for_envelope_4-8Hz']].to_numpy()

        scaler = StandardScaler()
        x = scaler.fit_transform(x)

        samples = [[sample] for sample in dataset.index.tolist()]

        x = np.append(x, samples, axis=1)
        x_train, x_test, y_train, y_test = train_test_split(x, y)
        train_samples, test_samples = x_train[:, 2], x_test[:, 2]
        x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

        clf = svm.SVC()
        clf.fit(x_train, y_train)
        pred = clf.predict(x_test)
        tn, fp, fn, tp = confusion_matrix(y_test, pred, labels=[0,1]).ravel()
        tn_c += tn
        fp_c += fp
        fn_c += fn
        tp_c += tp

    confusions.append((tn_c/n_iter, fp_c/n_iter, fn_c/n_iter, tp_c/n_iter))

In [None]:
# regions

n_iter = 1000

for stat in stats:

    features = ['4-8Hz_wpli', '4-8Hz_envelope']
    tn_c, tp_c, fn_c, fp_c = 0, 0, 0, 0

    for _ in range(n_iter):
        clf = svm.SVC()
        true_data = stat.datasets['true'][features]
        false_data = stat.datasets['false_mirror'][features]
        true_data = true_data.assign(resected=True)
        false_data = false_data.assign(resected=False)
        for sample in true_data.index:
            subject = sample[:4]
            if subject in REJECTED:
                true_data = true_data.drop(index=sample)
        for sample in false_data.index:
            subject = sample[:4]
            if subject in REJECTED:
                false_data = false_data.drop(index=sample)
        dataset = pd.concat([true_data, false_data], axis=0)
        dataset = dataset.sample(frac = 1)


        scaler = StandardScaler()

        y = dataset['resected'].to_numpy()
        dataset = dataset.drop(['resected'], axis=1)
        samples = [[sample] for sample in dataset.index.tolist()]

        x = scaler.fit_transform(dataset)
        x = np.append(x, samples, axis=1)
        x_train, x_test, y_train, y_test = train_test_split(x, y)
        train_samples, test_samples = x_train[:, 2], x_test[:, 2]
        x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

        clf.fit(x_train, y_train)
        pred = clf.predict(x_test)

        tn, fp, fn, tp = confusion_matrix(y_test, pred, labels=[0,1]).ravel()
        tn_c += tn
        fp_c += fp
        fn_c += fn
        tp_c += tp

    confusions.append((tn_c/n_iter, fp_c/n_iter, fn_c/n_iter, tp_c/n_iter))

In [None]:
index = [
    'Transitivity',
    'S metric',
    'Global Efficiency',
    'Eigencentrality',
    'Betweenness',
    'Closeness',
    'Degree',
    'Info',
    'Harmony',
    'Katz',
    'Percolation'
]

columns = [
    'accuracy',
    'specificity',
    'sensitivity',
    'Matthews',
    'f1',
    'Fowlkes–Mallows'
]

series = list()

for tn, fp, fn, tp in confusions:

    series.append(
        [
            (tp + tn) / (tp + tn + fp + fn),
            tn / (tn + fp),
            tp / (tp + fn),
            (tp * tn - fp * fn) /
            np.sqrt(
                (tp + fp) *
                (tp + fn) *
                (tn + fp) *
                (tn + fn)
            ),
            2 * tp / (2 * tp + fp + fn),
            np.sqrt(tp / (tp + fn) * tp / (tp + fp))
        ]
    )

df = pd.DataFrame(
    series,
    columns=columns,
    index=index
)
df = df.T
# print(df)
df.to_csv('~/Documents/Metrics_informativeness.csv')


In [None]:
for name, info in zip(names, cross_subjects_informativeness_arr):

    acc = dict(
        sorted(
            info.acc().items(),
            key=lambda item: item[0]
        )
    )

    plt.bar(
        acc.keys(),
        acc.values()
    )
    plt.xticks(rotation=90)
    # plt.xticks(range(len(info.acc().keys())))
    plt.title(f'Subjects-level prediction accuracy, {name}')
    plt.show()


In [None]:
series = list()
names = ['Transitivity, acc, %', 'S metric, acc, %', 'Global Efficiency, acc, %', 'Eigencentrality, acc, %']
for info in cross_subjects_informativeness_arr:
    series.append(
        pd.Series(
            dict(
                sorted(
                    info.acc().items(),
                    key=lambda item: item[0]
                )
            )
        )
    )


df = pd.DataFrame(series, index=names)

df = df.T
print(df)
df.to_csv('/home/user/Documents/Engel1&2_Subject_informativeness.csv')

In [7]:
# wpli + transitivity & envelope + global efficiency
subjects = [subject for subject in SUBJECTS if subject.name not in REJECTED]
GRAPHS = [
    metric_for_hemispheres(subjects, nx.algorithms.cluster.transitivity),
    metric_for_hemispheres(subjects, nx.algorithms.global_efficiency),
]

In [39]:

# tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
#     confusions.append((tn, fp, fn, tp))

cross_hemispheres_informativeness = CrossInformativeness()
cross_subjects_informativeness = CrossInformativeness()
wpli = GRAPHS[0]['transitivity_for_wpli_4-8Hz']
envelope = GRAPHS[1]['global_efficiency_for_envelope_4-8Hz']
assert all(GRAPHS[0]['resected'] == GRAPHS[1]['resected'])
labels = GRAPHS[0]['resected']
dataset = pd.concat([wpli, envelope, labels], axis=1)

for _ in range(100):
    hemispheres_informatoveness = Informativeness()
    subjects_informativeness = SubjectsInformativeness()
    acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

    for i in range(100):

        y = dataset['resected'].to_numpy()
        x = dataset[['transitivity_for_wpli_4-8Hz', 'global_efficiency_for_envelope_4-8Hz']].to_numpy()

        scaler = StandardScaler()
        x = scaler.fit_transform(x)

        samples = [[sample] for sample in dataset.index.tolist()]

        x = np.append(x, samples, axis=1)
        engel_1_2_set_x, engel_1_2_set_y = list(), list()
        test_set_x, test_set_y = list(), list()
        for sample_x, sample_y in zip(x, y):
            if any([subject in sample_x[-1] for subject in [*ENGEL1, *ENGEL2]]):
                engel_1_2_set_x.append(sample_x)
                engel_1_2_set_y.append(sample_y)
            else:
                test_set_x.append(sample_x)
                test_set_y.append(sample_y)

        x_train, x_test_add, y_train, y_test_add = train_test_split(
            engel_1_2_set_x,
            engel_1_2_set_y,
            train_size=0.5
        )
        # x_test = test_set_x + x_test_add
        # y_test = test_set_y + y_test_add
        x_test = x_test_add
        y_test = y_test_add
        # train_samples, test_samples = x_train[:][2], x_test[:][2]
        train_samples = np.array([sample[2] for sample in x_train])
        test_samples = np.array([sample[2] for sample in x_test])
        x_train = np.array([sample[0:2] for sample in x_train])
        x_test = np.array([sample[0:2] for sample in x_test])

        clf = svm.SVC()
        clf.fit(x_train, y_train)
        pred = clf.predict(x_test)
        acc.append(accuracy_score(y_test, pred))
        # if not any([tn + fp == 0, tp + fn == 0, tn + fn == 0, tp + fp == 0]):
        #     spec.append(tn / (tn + fp))
        #     sens.append(tp / (tp + fn))
        #     negpred.append(tn/(tn + fn))
        #     pospred.append(tp/(tp + fp))

        for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
            hemispheres_informatoveness.informativeness = sample, actual, 'correct' \
            if predicted == actual else 'wrong'
            subjects_informativeness.informativeness = sample, actual, 'correct' \
            if predicted == actual else 'wrong'

    # print('acc: ', np.array(acc).mean())
    cross_subjects_informativeness.informativeness = subjects_informativeness
    cross_hemispheres_informativeness.informativeness = hemispheres_informatoveness


In [40]:
df = pd.DataFrame([
    pd.Series(
        dict(
            sorted(
                cross_subjects_informativeness.acc().items(),
                key=lambda item: item[0]
            )
        )
    ),
    pd.Series(
        dict(
            sorted(
                cross_subjects_informativeness.tnr().items(),
                key=lambda item: item[0]
            )
        )
    ),
    pd.Series(
        dict(
            sorted(
                cross_subjects_informativeness.tpr().items(),
                key=lambda item: item[0]
            )
        )
    )
])\
    .T\
    .rename(
    columns={
        0: 'acc',
        1: 'spec',
        2: 'sens'
    }
)

print(df)

df.to_csv('/home/user/Documents/cross_hemispheres_analysis_Eng1&2.csv')

           acc      spec      sens
B1C2  0.593741  0.566960  0.661268
B1R1  0.943664  0.918529  0.971559
G1R1  0.971040  0.967070  0.975110
G1V2  0.491887  0.493411  0.458599
J1T2  0.472808  0.477791  0.465513
K1V1  0.479073  0.300293  0.488148
L1P1  0.968103  0.960690  0.975657
L2M1  0.646032  0.679241  0.623245
M1G2  0.733508  0.945288  0.660555
M1N2  0.928267  0.889090  0.975835
M2S2  0.683802  0.625087  0.833835
N2K2  0.593690  0.879329  0.555618
O1O2  0.942102  0.919358  0.968397
R1D2  0.957813  0.966650  0.949505
S1A2  0.355623  0.408578  0.163627
S1B1  0.713685  0.913161  0.643823
S1H1  0.466303  0.479497  0.435388


In [34]:
# regions wpli+eigen & envelope+percolation

path = f'/home/user//Documents/initial_stats.pkl'
stats = pickle.load(open(path, 'rb'))

In [37]:
stat1 = stats[0]
stat2 = stats[-1]

cross_subjects_informativeness = CrossInformativeness()

for _ in range(100):
    features = ['4-8Hz_wpli', '4-8Hz_envelope']

    full_true_data_1 = stat1.datasets['true']['4-8Hz_wpli']
    full_true_data_2 = stat2.datasets['true']['4-8Hz_envelope']
    full_false_data_1 = stat1.datasets['false_mirror']['4-8Hz_wpli']
    full_false_data_2 = stat2.datasets['false_mirror']['4-8Hz_envelope']

    full_true_data = pd.concat([full_true_data_1, full_true_data_2], axis=1)

    full_false_data = pd.concat([full_false_data_1, full_false_data_2], axis=1)

    acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

    subjects_informativeness = SubjectsInformativeness()

    for _ in range(100):

        # Engel1&2 - train, the rest - test
        scaler = StandardScaler()
        clf = svm.SVC(kernel='sigmoid')
        engel_1_2_true = full_true_data.loc[(elem[:4] in [*ENGEL1, *ENGEL2] and not elem[:4] in REJECTED for elem in full_true_data.index)].assign(resected=True)
        engel_1_2_false = full_false_data.loc[(elem[:4] in [*ENGEL1, *ENGEL2] and not elem[:4] in REJECTED for elem in full_false_data.index)].assign(resected=False)
        engel_3_4_true = full_true_data\
            .loc[(elem[:4] not in [*ENGEL1, *ENGEL2, *REJECTED] for elem in full_true_data.index)].assign(resected=True)
        engel_3_4_false = full_false_data\
            .loc[(elem[:4] not in [*ENGEL1, *ENGEL2, *REJECTED] for elem in full_false_data.index)].assign(resected=False)
        engel_1_2 = pd.concat([engel_1_2_true, engel_1_2_false], axis=0)
        engel_1_2_train, engel_1_2_test = np.array_split(engel_1_2.sample(frac=1), 2)
        engel_3_4 = pd.concat([engel_3_4_true, engel_3_4_false], axis=0)

        y_train = engel_1_2_train['resected'].to_numpy()
        x_train = scaler.fit_transform(engel_1_2_train.drop(['resected'], axis=1))

        # engel_test = pd.concat([engel_3_4, engel_1_2_test], axis=0).sample(frac=1)
        engel_test = engel_1_2_test
        test_samples = engel_test.index.to_list()
        y_test = engel_test['resected'].to_numpy()
        x_test = scaler.fit_transform(engel_test.drop(['resected'], axis=1))

        clf.fit(x_train, y_train)
        pred = clf.predict(x_test)

        for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
            subjects_informativeness.informativeness = sample, actual, 'correct'\
                if predicted == actual else 'wrong'
        acc.append(accuracy_score(y_test, pred))

        # if not any([tn + fp == 0, tp + fn == 0, tn + fn == 0, tp + fp == 0]):
        #     spec.append(tn / (tn + fp))
        #     sens.append(tp / (tp + fn))
        #     negpred.append(tn/(tn + fn))
        #     pospred.append(tp/(tp + fp))
    # print('acc: ', np.array(acc).mean())
    cross_subjects_informativeness.informativeness = subjects_informativeness

In [38]:

df = pd.DataFrame([
    pd.Series(
        dict(
            sorted(
                cross_subjects_informativeness.acc().items(),
                key=lambda item: item[0]
            )
        )
    ),
    pd.Series(
        dict(
            sorted(
                cross_subjects_informativeness.tnr().items(),
                key=lambda item: item[0]
            )
        )
    ),
    pd.Series(
        dict(
            sorted(
                cross_subjects_informativeness.tpr().items(),
                key=lambda item: item[0]
            )
        )
    )
])\
    .T\
    .rename(
    columns={
        0: 'acc',
        1: 'spec',
        2: 'sens'
    }
)

print(df)
df.to_csv('/home/user/Documents/cross_regions_analysis_Eng1&2.csv')


           acc      spec      sens
B1C2  0.506243  0.506624  0.506012
B1R1  0.734763  0.735956  0.733580
G1R1  0.826107  0.821641  0.830825
G1V2  0.683769  0.622493  0.867873
J1T2  0.491037  0.441282  0.493418
K1V1  0.594397  0.591699  0.597210
L1P1  0.599588  0.577812  0.640597
L2M1  0.592070  0.571948  0.626103
M1G2  0.646031  0.658354  0.635776
M1N2  0.652498  0.798278  0.602839
M2S2  0.518425  0.509299  0.637586
N2K2  0.396261  0.293487  0.431090
O1O2  0.584784  0.697479  0.552091
R1D2  0.511443  0.503658  0.617442
S1A2  0.120297  0.083647  0.151037
S1B1  0.536596  0.524226  0.575695
S1H1  0.864598  0.810340  0.942361
