In [1]:
import itertools
import operator
import re
from abc import *
from copy import deepcopy
from operator import itemgetter
from typing import *
import numpy as np
import scipy as sp
import networkx as nx
import mne
import time
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors._dist_metrics import DistanceMetric
from sklearn.utils import shuffle
from typing import List, Tuple
from sklearn.model_selection import train_test_split
from nodestimation.learning.estimation import collect_statistic, \
    compute_importance, collect_cross_statistic, make_selection_map, \
    select, separate_datasets, selected_statistic, choose_best, selected_data, make_feature_selection
from nodestimation.learning.informativeness import CrossInformativeness, Informativeness, SubjectsInformativeness, \
    NodesInformativeness
from nodestimation.learning.networking import sparse_graph, graph_to_hemispheres, hemispheres_division_modularity, \
    metric_for_hemispheres
from nodestimation.processing.features import prepare_features
from nodestimation.project import find_subject_dir, conditions_unique_code
from nodestimation.pipeline import pipeline
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import nibabel
import nilearn.plotting as nplt
from nodestimation.project.actions import read
import nodestimation as nd
from nodestimation.learning.modification import append_series, promote
import nodestimation.learning.modification as lmd
from nodestimation.project.subject import Subject
from sklearn.preprocessing import *
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectFromModel
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
import matplotlib as mpl
from nodestimation.learning.selection import SubjectsStatistic, Wilcoxon, Mannwhitneyu, Test
from scipy.stats import wilcoxon
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier
from scipy.fftpack import fftfreq, irfft, rfft
from scipy.fftpack import fftfreq, irfft, rfft

# ENGEL1 = [
#     'B1C2',
#     'B1R1',
#     'G1R1',
#     'G1V2',
#     'J1T2',
#     'K1V1',
#     'L1P1',
#     'M1G2',
#     'M1N2',
#     'O1O2',
#     'R1D2',
#     'S1A2',
#     'S1B1',
#     'S1H1',
#     'S1U3'
# ]
# ENGEL2 = [
#     'L2M1',
#     'M2S2',
#     'N2K2',
#     'P1H2'
# ]
# ENGEL34 = [
#     'N3S2',
#     'S3R1',
#     'K4L2'
# ]
#
# REJECTED = [
#     'S1U3',
#     'P1H2'
# ]

INCLUDED = [
    'B1R1',
    'G1R1',
    'G1V2',
    'L1P1',
    'M1G2',
    'M1N2',
    'O1O2',
    'R1D2',
]

In [2]:
SUBJECTS = pipeline(
    methods=['wpli', 'envelope', 'coh', 'imcoh', 'plv', 'ciplv', 'ppc', 'pli', 'pli2_unbiased', 'wpli2_debiased'],
    freq_bands=(4, 8),
    centrality_metrics=['eigen', 'between', 'degree', 'info']
    )

subjects = [subject for subject in SUBJECTS if subject.name in INCLUDED]

All computation has been already done, loading of the existing file with the solution...


In [3]:
stat = SubjectsStatistic(subjects, 'resected', centrality_metric='eigen')
print('stat done')

stat done


In [4]:
DATASETS = [
    # metric_for_hemispheres(subjects, nx.algorithms.cluster.transitivity),
    # metric_for_hemispheres(subjects, nx.algorithms.smetric.s_metric, normalized=False),
    metric_for_hemispheres(subjects, nx.algorithms.global_efficiency)
]
print('graph done')

R1D2: DONE, RUNTIME: 6.836315631866455
L1P1: DONE, RUNTIME: 5.4242987632751465
M1G2: DONE, RUNTIME: 6.571587085723877
G1V2: DONE, RUNTIME: 5.862873554229736
G1R1: DONE, RUNTIME: 5.729758977890015
M1N2: DONE, RUNTIME: 6.250350713729858
B1R1: DONE, RUNTIME: 5.544912815093994
O1O2: DONE, RUNTIME: 6.459646701812744
graph done


In [5]:
datasets = DATASETS.copy()
names = ['global_efficiency']
cross_hemispheres_informativeness_arr = list()
cross_subjects_informativeness_arr = list()

for dataset, name in zip(datasets, names):
    cross_hemispheres_informativeness = CrossInformativeness()
    cross_subjects_informativeness = CrossInformativeness()

    for _ in range(100):
        hemispheres_informatoveness = Informativeness()
        subjects_informativeness = SubjectsInformativeness()
        acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

        for i in range(100):

            y = dataset['resected'].to_numpy()
            x = dataset[[f'{name}_for_wpli_4-8Hz', f'{name}_for_envelope_4-8Hz']].to_numpy()

            scaler = StandardScaler()
            x = scaler.fit_transform(x)

            samples = [[sample] for sample in dataset.index.tolist()]

            x = np.append(x, samples, axis=1)
            x_train, x_test, y_train, y_test = train_test_split(x, y)
            train_samples, test_samples = x_train[:, 2], x_test[:, 2]
            x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

            clf = svm.SVC()
            clf.fit(x_train, y_train)
            pred = clf.predict(x_test)

            for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
                hemispheres_informatoveness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'
                subjects_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'

        cross_subjects_informativeness.informativeness = subjects_informativeness
        cross_hemispheres_informativeness.informativeness = hemispheres_informatoveness

    cross_hemispheres_informativeness_arr.append(cross_hemispheres_informativeness)
    cross_subjects_informativeness_arr.append(cross_subjects_informativeness)

In [6]:
cross_nodes_informativeness = CrossInformativeness()
cross_subjects_informativeness = CrossInformativeness()
cross_samples_informativeness = CrossInformativeness()

for _ in range(100):
    features = ['4-8Hz_wpli', '4-8Hz_envelope']
    acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()

    samples_informativeness = Informativeness()
    nodes_informativeness = NodesInformativeness()
    subject_informativeness = SubjectsInformativeness()

    for _ in range(100):
        clf = svm.SVC()
        true_data = stat.datasets['true'][features]
        false_data = stat.datasets['false_mirror'][features]
        true_data = true_data.assign(resected=True)
        false_data = false_data.assign(resected=False)
        dataset = pd.concat([true_data, false_data], axis=0)
        dataset = dataset.sample(frac = 1)


        scaler = StandardScaler()

        y = dataset['resected'].to_numpy()
        dataset = dataset.drop(['resected'], axis=1)
        samples = [[sample] for sample in dataset.index.tolist()]

        x = scaler.fit_transform(dataset)
        x = np.append(x, samples, axis=1)
        x_train, x_test, y_train, y_test = train_test_split(x, y)
        train_samples, test_samples = x_train[:, 2], x_test[:, 2]
        x_train, x_test = x_train[:, 0:2], x_test[:, 0:2]

        clf.fit(x_train, y_train)
        pred = clf.predict(x_test)

        for predicted, actual, sample, value in zip(pred, y_test, test_samples, x_test):
            nodes_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'
            subject_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'
            samples_informativeness.informativeness = sample, actual, 'correct' if predicted == actual else 'wrong'

    cross_nodes_informativeness.informativeness = nodes_informativeness
    cross_subjects_informativeness.informativeness = subject_informativeness
    cross_samples_informativeness.informativeness = samples_informativeness

cross_subjects_informativeness_arr.append(cross_subjects_informativeness)

In [None]:
# high (nodes), eigencentrality

results = list()

for percentage in [0.0, 0.6]:

    test_subject = 'K1V1'
    acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
    features = ['4-8Hz_wpli', '4-8Hz_envelope']
    accur = cross_samples_informativeness.acc()

    # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
    # clf = AdaBoostClassifier(n_estimators=10)
    # clf = svm.SVC(kernel='sigmoid')
    # clf = svm.SVC()
    # clf = svm.SVC(kernel='linear')
    # clf = SGDClassifier()
    # clf = KNeighborsClassifier(n_neighbors=3)
    clf = LogisticRegression(class_weight={True: 1, False: .7})
    # clf = RandomForestClassifier(max_depth=20)
    # clf = GaussianNB()
    # clf = LinearDiscriminantAnalysis()
    # clf = QuadraticDiscriminantAnalysis()
    # clf = KMeans(n_clusters=2, algorithm='full')
    # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(100, 100), max_iter=1450)

    true_data = stat.datasets['true'][features].copy()
    false_data = stat.datasets['false_mirror'][features].copy()
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    for sample in true_data.index:
        if accur[sample] < 0.60 and test_subject not in sample:
            true_data = true_data.drop(index=sample)
    for sample in false_data.index:
        if accur[sample] < percentage and test_subject not in sample:
            false_data = false_data.drop(index=sample)

    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()

    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)

    x = scaler.fit_transform(dataset)

    clf.fit(x, y)

    for subject in subjects:
        if subject.name == test_subject:
            test_subject = subject
            break

    dataset = test_subject.datasets['eigen'][['4-8Hz_wpli', '4-8Hz_envelope', 'resected']]
    # dataset = dataset.sample(frac = 1)

    x_test = scaler.fit_transform(
        dataset[['4-8Hz_wpli', '4-8Hz_envelope']].to_numpy()
    )
    y_test = dataset['resected'].to_numpy()

    y_pred = clf.predict(x_test)

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    print(f'Training on 60%-above true data and {percentage*100}%-above false data, testing on {test_subject}')
    print(f'acc: {(tn + tp)/(tn + tp + fn + fp)}')
    print(f'spec: {tn / (tn + fp)}')
    print(f'sens: {tp / (tp + fn)}\n')

    # print('test  pred')
    # for test, pred in zip(y_test, y_pred):
    #     print(test, pred)
    results.append(y_pred)
    if percentage == 0.6:
        results.append(y_test)

x1 = np.array([1 if res == True else 0 for res in results[0]])
x2 = np.array([1 if res == True else 0 for res in results[1]])
x = np.array([x1, x2])
y = np.array(results[2])

# scaler = StandardScaler()
#
# x = scaler.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(x.T, y.T)

clf = svm.SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

print('Training on previous results')
print('acc: ', (tn + tp)/(tn + tp + fn + fp))
print('spec: ', tn / (tn + fp))
print('sens: ', tp / (tp + fn))


# results = np.array(results)
# t1 = np.array([True if res2 and not res1 else False for res1, res2 in zip(results[0], results[1])])
# t2 = results[2]
#
# tn, fp, fn, tp = confusion_matrix(t2, t1).ravel()
#
# print('acc: ', (tn + tp)/(tn + tp + fn + fp))
# print('spec: ', tn / (tn + fp))
# print('sens: ', tp / (tp + fn))
#
# # for t11, t22 in zip(t1, t2):
# #     print(t11, t22)
#
# # for t11, t22, res in zip(results[0], results[1], results[2]):
# #     print(t22 and not t11, res)
#
# # for test, pred in zip(y_test, y_pred):
# #     print(test, pred)

In [70]:
accs = list()
specs = list()
senss = list()


for subject in subjects:

    # if subject.name in ENGEL34 or subject.name in REJECTED:
    #     continue

    test_subject = subject.name
    acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
    features = ['4-8Hz_wpli', '4-8Hz_envelope']
    accur = cross_samples_informativeness.acc()

    true_data = stat.datasets['true'][features].copy()
    false_data = stat.datasets['false_mirror'][features].copy()
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    for sample in true_data.index:
        if accur[sample] < 0.80 or test_subject in sample:
            true_data = true_data.drop(index=sample)
    for sample in false_data.index:
        if accur[sample] < 0.20 or test_subject in sample:
            false_data = false_data.drop(index=sample)

    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)
    # print(dataset)

    scaler = StandardScaler()

    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)

    x = scaler.fit_transform(dataset)

    true, false = 0, 0

    for sample in y:
        if sample:
            true += 1
        else:
            false += 1

    # print(true, false, true/false)

    # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
    # clf = AdaBoostClassifier(n_estimators=10)
    # clf = svm.SVC(class_weight={True: 1, False: .5})
    # clf = svm.SVC(class_weight={True: false/(true+false), False: true/(true+false)})
    # clf = svm.SVC(kernel='sigmoid', class_weight={True: false/(true+false), False: true/(true+false)})
    # clf = svm.SVC(kernel='poly', class_weight={True: false/(true+false), False: true/(true+false)})
    clf = svm.SVC(kernel='linear', class_weight={True: false/(true+false), False: true/(true+false)})
    # clf = SGDClassifier()
    # clf = KNeighborsClassifier(n_neighbors=3)
    # clf = LogisticRegression(class_weight={True: 1, False: .7})
    # clf = LogisticRegression(class_weight={True: 1, False: 0.3})
    # clf = LogisticRegression(class_weight={True: false/(true+false), False: true/(true+false)}) #*
    # clf = RandomForestClassifier(max_depth=20)
    # clf = GaussianNB()
    # clf = LinearDiscriminantAnalysis() #*
    # clf = QuadraticDiscriminantAnalysis()
    # clf = KMeans(n_clusters=2, algorithm='full')
    # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(100, 100), max_iter=1450)

    clf.fit(x, y)

    for subject in subjects:
        if subject.name == test_subject:
            test_subject = subject
            break

    dataset = test_subject.datasets['eigen'][['4-8Hz_wpli', '4-8Hz_envelope', 'resected']]
    dataset = dataset.sample(frac = 1)
    # print(dataset)

    x_test = scaler.fit_transform(
        dataset[['4-8Hz_wpli', '4-8Hz_envelope']].to_numpy()
    )
    y_test = dataset['resected'].to_numpy()

    y_pred = clf.predict(x_test)

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    accs.append((tn + tp)/(tn + tp + fn + fp))
    specs.append(tn / (tn + fp))
    senss.append(tp / (tp + fn))

    if (tn + tp)/(tn + tp + fn + fp) > 0.5 and\
        (tn / (tn + fp)) > 0.5 and\
        (tp / (tp + fn)) > 0.5:
        print(test_subject)

    # print(f'Training on 60%-above true data and {percentage*100}%-above false data, testing on {test_subject}')
    # print(f'acc: {(tn + tp)/(tn + tp + fn + fp)}')
    # print(f'spec: {tn / (tn + fp)}')
    # print(f'sens: {tp / (tp + fn)}\n')

print(f'mean acc: {np.array(accs).mean()}')
print(f'mean spec: {np.array(specs).mean()}')
print(f'mean sens: {np.array(senss).mean()}')


Subject L1P1 at ./Source/Subjects/L1P1 

Subject M1G2 at ./Source/Subjects/M1G2 

Subject G1R1 at ./Source/Subjects/G1R1 

Subject O1O2 at ./Source/Subjects/O1O2 

mean acc: 0.6458333333333333
mean spec: 0.6447422530556667
mean sens: 0.65625


In [94]:
accs = list()
specs = list()
senss = list()


for subject in subjects:

    # if subject.name in ENGEL34 or subject.name in REJECTED:
    #     continue

    percentage = 0.10
    test_subject = subject.name
    acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
    features = ['4-8Hz_wpli', '4-8Hz_envelope']
    accur = cross_samples_informativeness.acc()

    true_data = stat.datasets['true'][features].copy()
    false_data = stat.datasets['false_mirror'][features].copy()
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    for sample in true_data.index:
        if accur[sample] < 0.80 or test_subject in sample:
            true_data = true_data.drop(index=sample)
    for sample in false_data.index:
        if accur[sample] < 0.20 or test_subject in sample:
            false_data = false_data.drop(index=sample)

    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)
    # print(dataset)

    scaler = StandardScaler()

    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)

    x = scaler.fit_transform(dataset)

    true, false = 0, 0

    for sample in y:
        if sample:
            true += 1
        else:
            false += 1

    # print(true, false, true/false)

    # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
    # clf = AdaBoostClassifier(n_estimators=10)
    # clf = svm.SVC(class_weight={True: 1, False: .5})
    clf = svm.SVC(class_weight={True: false/(true+false), False: true/(true+false)})
    # clf = svm.SVC(kernel='sigmoid', class_weight={True: false/(true+false), False: true/(true+false)}) #**
    # clf = svm.SVC(kernel='poly', class_weight={True: false/(true+false), False: true/(true+false)})
    # clf = svm.SVC(kernel='precomputed')
    # clf = svm.SVC(kernel='linear', class_weight={True: false/(true+false), False: true/(true+false)}) #**
    # clf = SGDClassifier()
    # clf = KNeighborsClassifier(n_neighbors=3)
    # clf = LogisticRegression(class_weight={True: 1, False: .7}) #**
    # clf = LogisticRegression(class_weight={True: 1, False: 0.3})
    # clf = LogisticRegression(class_weight={True: false/(true+false), False: true/(true+false)}) #*
    # clf = RandomForestClassifier(max_depth=20)
    # clf = GaussianNB()
    # clf = LinearDiscriminantAnalysis()
    # clf = QuadraticDiscriminantAnalysis()
    # clf = KMeans(n_clusters=2, algorithm='full')
    # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(100, 100), max_iter=1450)

    clf.fit(x, y)

    for subject in subjects:
        if subject.name == test_subject:
            test_subject = subject
            break

    true_data = stat.datasets['true'][features].copy()
    # false_data = stat.datasets['false_res'][features].copy()
    false_data = stat.random_samples()[features].copy()
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    for sample in true_data.index:
        if test_subject.name not in sample:
            true_data = true_data.drop(index=sample)
    # for sample in false_data.index:
    #     if test_subject.name not in sample:
    #         false_data = false_data.drop(index=sample)

    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)
    # print(dataset)

    x_test = scaler.fit_transform(
        dataset[['4-8Hz_wpli', '4-8Hz_envelope']].to_numpy()
    )
    y_test = dataset['resected'].to_numpy()

    y_pred = clf.predict(x_test)

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    accs.append((tn + tp)/(tn + tp + fn + fp))
    specs.append(tn / (tn + fp))
    senss.append(tp / (tp + fn))

    if (tn + tp)/(tn + tp + fn + fp) > 0.5 and\
        (tn / (tn + fp)) > 0.5 and\
        (tp / (tp + fn)) > 0.5:
        print(test_subject)

    # print(f'Training on 60%-above true data and {percentage*100}%-above false data, testing on {test_subject}')
    # print(f'acc: {(tn + tp)/(tn + tp + fn + fp)}')
    # print(f'spec: {tn / (tn + fp)}')
    # print(f'sens: {tp / (tp + fn)}\n')

print(f'mean acc: {np.array(accs).mean()}')
print(f'mean spec: {np.array(specs).mean()}')
print(f'mean sens: {np.array(senss).mean()}')



Subject L1P1 at ./Source/Subjects/L1P1 

Subject M1G2 at ./Source/Subjects/M1G2 

Subject G1R1 at ./Source/Subjects/G1R1 

Subject O1O2 at ./Source/Subjects/O1O2 

mean acc: 0.6497607655502393
mean spec: 0.6458333333333333
mean sens: 0.65625


In [None]:
results = list()

for clf, i in zip([LogisticRegression(class_weight={True: 1, False: .7}),
            LogisticRegression(class_weight={True: 1, False: .3})], range(2)):

    test_subject = 'K1V1'
    acc, spec, sens, pospred, negpred = list(), list(), list(), list(), list()
    features = ['4-8Hz_wpli', '4-8Hz_envelope']
    accur = cross_samples_informativeness.acc()

    # clf = GradientBoostingClassifier(n_estimators=10, learning_rate=1.0, max_depth=10)
    # clf = AdaBoostClassifier(n_estimators=10)
    # clf = svm.SVC(kernel='sigmoid')
    # clf = svm.SVC()
    # clf = svm.SVC(kernel='linear')
    # clf = SGDClassifier()
    # clf = KNeighborsClassifier(n_neighbors=3)
    # clf = RandomForestClassifier(max_depth=20)
    # clf = GaussianNB()
    # clf = LinearDiscriminantAnalysis()
    # clf = QuadraticDiscriminantAnalysis()
    # clf = KMeans(n_clusters=2, algorithm='full')
    # clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(100, 100), max_iter=1450)

    true_data = stat.datasets['true'][features].copy()
    false_data = stat.datasets['false_mirror'][features].copy()
    true_data = true_data.assign(resected=True)
    false_data = false_data.assign(resected=False)
    for sample in true_data.index:
        if accur[sample] < 0.80 and test_subject not in sample:
            true_data = true_data.drop(index=sample)
    # for sample in false_data.index:
    #     if accur[sample] < percentage and test_subject not in sample:
    #         false_data = false_data.drop(index=sample)

    dataset = pd.concat([true_data, false_data], axis=0)
    dataset = dataset.sample(frac = 1)


    scaler = StandardScaler()

    y = dataset['resected'].to_numpy()
    dataset = dataset.drop(['resected'], axis=1)

    x = scaler.fit_transform(dataset)

    clf.fit(x, y)

    for subject in subjects:
        if subject.name == test_subject:
            test_subject = subject
            break

    dataset = test_subject.datasets['eigen'][['4-8Hz_wpli', '4-8Hz_envelope', 'resected']]
    # dataset = dataset.sample(frac = 1)

    x_test = scaler.fit_transform(
        dataset[['4-8Hz_wpli', '4-8Hz_envelope']].to_numpy()
    )
    y_test = dataset['resected'].to_numpy()

    y_pred = clf.predict(x_test)

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    print(f'Training on 60%-above true data and {percentage*100}%-above false data, testing on {test_subject}')
    print(f'acc: {(tn + tp)/(tn + tp + fn + fp)}')
    print(f'spec: {tn / (tn + fp)}')
    print(f'sens: {tp / (tp + fn)}\n')

    # print('test  pred')
    # for test, pred in zip(y_test, y_pred):
    #     print(test, pred)
    results.append(y_pred)
    if i == 1:
        results.append(y_test)

x1 = np.array([1 if res == True else 0 for res in results[0]])
x2 = np.array([1 if res == True else 0 for res in results[1]])
x = np.array([x1, x2])
y = np.array(results[2])

# scaler = StandardScaler()
#
# x = scaler.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(x.T, y.T)

clf = svm.SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

print('Training on previous results')
print('acc: ', (tn + tp)/(tn + tp + fn + fp))
print('spec: ', tn / (tn + fp))
print('sens: ', tp / (tp + fn))


# results = np.array(results)
# t1 = np.array([True if res2 and not res1 else False for res1, res2 in zip(results[0], results[1])])
# t2 = results[2]
#
# tn, fp, fn, tp = confusion_matrix(t2, t1).ravel()
#
# print('acc: ', (tn + tp)/(tn + tp + fn + fp))
# print('spec: ', tn / (tn + fp))
# print('sens: ', tp / (tp + fn))
#
# # for t11, t22 in zip(t1, t2):
# #     print(t11, t22)
#
score1, score2 = list(), list()
for t22, t11, res in zip(results[0], results[1], results[2]):
    if t11:
        score1.append(t11)
        print(t11, res, t22, 't22')
    elif not t22:
        score1.append(t22)
        print(t22, res, t11, 't11')
    else:
        print('None', res)
#     if not t22:
#         score1.append(t22)
#         # print(t22, res)
#     elif t11:
#         score2.append(t11)
#         # print(t11, res)
#
# tn, fp, fn, tp = confusion_matrix(np.array(score1), results[2]).ravel()
#
# print('Training on previous results')
# print('acc: ', (tn + tp)/(tn + tp + fn + fp))
# print('spec: ', tn / (tn + fp))
# print('sens: ', tp / (tp + fn))
#
# tn, fp, fn, tp = confusion_matrix(score2, results[2]).ravel()
#
# print('Training on previous results')
# print('acc: ', (tn + tp)/(tn + tp + fn + fp))
# print('spec: ', tn / (tn + fp))
# print('sens: ', tp / (tp + fn))

#
# # for test, pred in zip(y_test, y_pred):
# #     print(test, pred)