In [3]:
import joblib
import mne
import numpy as np
import pandas as pd
from imblearn.under_sampling import RandomUnderSampler
from lightgbm import LGBMClassifier
from sklearn.metrics import (accuracy_score, average_precision_score,
                             confusion_matrix, f1_score, precision_score,
                             recall_score, roc_auc_score)
from sklearn.model_selection import StratifiedKFold

from add_subject_utils import get_subj_data, map_nan_index
import pickle

In [9]:
mne.set_log_level("error")

Rest of add_new_subj notebook

In [None]:
meta_data = ['subj', 'epoch_id', 'chan_name', 'epoch']
feat_to_choose_depth = ['teager_kaiser_energy_1_std', 'teager_kaiser_energy_2_std', 'chan_ptp', 'ptp_amp', 'hjorth_mobility', 'hjorth_complexity', 'teager_kaiser_energy_3_std', 'chan_kurt', 'teager_kaiser_energy_5_mean', 'teager_kaiser_energy_0_std', 'kurtosis', 'teager_kaiser_energy_0_mean', 'teager_kaiser_energy_1_mean', 'teager_kaiser_energy_5_std', 'samp_entropy']
feat_to_choose2 = ['chan', 'bands_gf', 'bands_gamma', 'bands_fast', 'bands_beta/gamma', 'bands_theta/fast', 'ptp', 'skew', 'teager', 'bands_sf', 'bands_bf', 'bands_bg', 'rms', 'katz', 'kurt', 'slope', 'mobility', 'hurst', 'wavelet']
feat_to_choose3 = ['chan', 'bands_gf', 'bands_gamma', 'energy_freq_bands_fast', 'bands_beta/gamma', 'bands_theta/fast', 'ptp', 'skew', 'teager', 'bands_sf', 'bands_bf', 'rms', 'kurt', 'slope', 'mobility', 'eog1_wavelet_coef_energy_0']
remove_3 = ['gamma/beta', 'fast/theta', 'fast/gamma']
remove_4 = ['gamma/sigma', 'fast/alpha', 'coef_1', '2_mean']
# get only columns that contain the string in feat_to_choose
x_feat = subj_feat[subj_feat.columns[subj_feat.columns.str.contains('|'.join(feat_to_choose2))]]
# now remove the metadata and some unwanted features
clean_feat = x_feat[x_feat.columns[~x_feat.columns.str.contains('|'.join(meta_data+remove_3))]]
clean_feat

Unnamed: 0,eog1_energy_freq_bands_gamma,eog1_energy_freq_bands_fast,eog1_hjorth_mobility,eog1_hjorth_mobility_spect,eog1_hurst_exp,eog1_katz_fd,eog1_kurtosis,eog1_pow_freq_bands_gamma,eog1_pow_freq_bands_fast,eog1_pow_freq_bands_theta/fast,...,eog2_wavelet_coef_energy_1,eog2_wavelet_coef_energy_2,eog2_wavelet_coef_energy_3,eog2_wavelet_coef_energy_4,eog2_energy_freq_bands_bg,eog2_energy_freq_bands_bf,eog2_energy_freq_bands_sf,eog2_energy_freq_bands_gf,eog2_chan_ptp,eog2_chan_kurt
0,0.092207,0.000079,0.072972,1.724620,0.752238,1.242145,1.728290,0.017627,0.000323,1544.998765,...,0.001134,0.048922,0.629345,1.829672,0.694779,557.409740,493.295773,802.283626,10.359066,3.673379
1,0.072265,0.000273,0.111870,2.107009,0.618507,1.455004,3.690966,0.041697,0.001513,314.222095,...,0.000157,0.016922,0.146240,2.472885,2.287288,1169.969886,370.600312,511.509633,10.359066,3.673379
2,0.060310,0.000723,0.241754,1.348111,0.835812,1.202516,2.373995,0.023218,0.000578,829.726495,...,0.000729,0.039661,0.212950,0.856875,1.336773,109.094089,23.563429,81.610027,10.359066,3.673379
3,0.069587,0.000545,0.147197,1.220273,0.871311,1.220477,1.959333,0.008369,0.000225,2373.250111,...,0.001720,0.042194,0.204139,1.383887,0.794851,84.977251,66.848866,106.909595,10.359066,3.673379
4,0.094975,0.000335,0.119188,3.409393,0.819717,1.303538,1.565871,0.005502,0.000176,3013.886212,...,0.001212,0.031702,0.528629,0.481083,0.609592,168.319031,58.348508,276.117657,10.359066,3.673379
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1192,0.017838,0.000049,0.160558,1.170072,1.252324,1.255017,2.791343,0.004847,0.000779,674.446268,...,0.000161,0.004853,0.106114,1.726922,2.168267,1095.378631,387.699334,505.186288,10.359066,3.673379
1193,0.029345,0.000052,0.165910,2.281380,0.989556,1.081192,1.772199,0.006619,0.001344,390.683242,...,0.000052,0.007928,0.155167,0.547002,2.103573,3620.123816,543.724512,1720.940141,10.359066,3.673379
1194,0.045333,0.000124,0.027994,1.724609,0.775838,1.194464,1.403741,0.003874,0.000345,1539.859461,...,0.000088,0.004882,0.123664,1.340190,1.950196,1415.806745,470.665064,725.981701,10.359066,3.673379
1195,0.047034,0.000111,0.100966,1.415740,0.810222,1.170506,2.907787,0.014566,0.001176,434.453827,...,0.000136,0.006415,0.164723,1.440781,1.232480,1188.050163,618.843285,963.951115,10.359066,3.673379


In [None]:
# unbalanced model
metrics = {'accuracy': [], 'precision': [], 'sensitivity': [], 'specificity': [],'f1': [], 'ROCAUC': [], 'PRAUC': []}
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=8)
i = 1
x, y = clean_feat, y_depth
for train_index, test_index in kf.split(x, y):
    print(f'Fold {i}')
    i += 1
    z_model = LGBMClassifier()
    x_train_fold, x_test_fold = x.iloc[train_index], x.iloc[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    z_model.fit(x_train_fold, y_train_fold)
    y_pred = z_model.predict(x_test_fold)
    y_true = y_test_fold
    # save scores in dict
    metrics['accuracy'].append(accuracy_score(y_true, y_pred))
    metrics['precision'].append(precision_score(y_true, y_pred))
    metrics['sensitivity'].append(recall_score(y_true, y_pred))
    metrics['f1'].append(f1_score(y_true, y_pred))
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    metrics['specificity'].append(tn / (tn + fp))
    metrics['ROCAUC'].append(roc_auc_score(y_true, y_pred))
    metrics['PRAUC'].append(average_precision_score(y_true, y_pred))

# print results as df
results = pd.DataFrame(metrics)
# add mean row
results.loc['mean'] = results.mean()
print(sum(y)/len(y))
results

Fold 1
[LightGBM] [Info] Number of positive: 68, number of negative: 889
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001462 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23460
[LightGBM] [Info] Number of data points in the train set: 957, number of used features: 92
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.071055 -> initscore=-2.570590
[LightGBM] [Info] Start training from score -2.570590
Fold 2
[LightGBM] [Info] Number of positive: 68, number of negative: 889
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001084 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23460
[LightGBM] [Info] Number of data points in the train set: 957, number of used features: 92
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.071055 -> initscore=-2.570590
[LightGBM] [Info] Start training from score -2.570590
Fold 3
[Li

Unnamed: 0,accuracy,precision,sensitivity,specificity,f1,ROCAUC,PRAUC
0,0.925,0.0,0.0,0.995516,0.0,0.497758,0.070833
1,0.933333,0.666667,0.117647,0.995516,0.2,0.556581,0.140931
2,0.937238,1.0,0.117647,1.0,0.210526,0.558824,0.180409
3,0.945607,0.833333,0.294118,0.995495,0.434783,0.644807,0.295307
4,0.937238,0.625,0.294118,0.986486,0.4,0.640302,0.234033
mean,0.935683,0.625,0.164706,0.994603,0.249062,0.579654,0.184303


In [None]:
# undersample
rus = RandomUnderSampler(random_state=8)
x, y = rus.fit_resample(clean_feat, y_depth)
len(y)

170

In [None]:
metrics = {'accuracy': [], 'precision': [], 'sensitivity': [], 'specificity': [],'f1': [], 'ROCAUC': [], 'PRAUC': []}
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=8)
i = 1
for train_index, test_index in kf.split(x, y):
    print(f'Fold {i}')
    i += 1
    z_model = LGBMClassifier()
    x_train_fold, x_test_fold = x.iloc[train_index], x.iloc[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    z_model.fit(x_train_fold, y_train_fold)
    y_pred = z_model.predict(x_test_fold)
    y_true = y_test_fold
    # save scores in dict
    metrics['accuracy'].append(accuracy_score(y_true, y_pred))
    metrics['precision'].append(precision_score(y_true, y_pred))
    metrics['sensitivity'].append(recall_score(y_true, y_pred))
    metrics['f1'].append(f1_score(y_true, y_pred))
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    metrics['specificity'].append(tn / (tn + fp))
    metrics['ROCAUC'].append(roc_auc_score(y_true, y_pred))
    metrics['PRAUC'].append(average_precision_score(y_true, y_pred))

# print results as df
results = pd.DataFrame(metrics)
# add mean row
results.loc['mean'] = results.mean()
print(sum(y)/len(y))
results

In [None]:
# a new model includes all the data
z_model = LGBMClassifier().fit(x, y)
y_proba = z_model.predict_proba(clean_feat).T
y_scalp = [p > 0.8 for p in y_proba[1]]
print(sum(y_scalp), sum(y_depth))
index_map = map_nan_index(r'D:\Bonn\012fn1\P701_mtl_clean.edf')
scalp_indexes = np.where(np.array(y_scalp) == True)[0]
scalp_onsets = [index_map[int(x)] / 4 for x in scalp_indexes]
depth_indexes = np.where(y_depth == 1)[0]
depth_onsets = [index_map[int(x)] / 4 for x in depth_indexes]
both = [x for x in scalp_onsets if x in depth_onsets]
depth_without_both = [x for x in depth_onsets if x not in both]
scalp_without_both = [x for x in scalp_onsets if x not in both]
# all annot
annot = mne.Annotations(scalp_without_both, [0.25] * len(scalp_without_both), ['scalp'] * len(scalp_without_both)).append(depth_without_both, [0.25] * len(depth_without_both), ['depth'] * len(depth_without_both)).append(both, [0.25] * len(both), ['both'] * len(both))
raw.set_annotations(annot)

raw.plot(duration=30, scalings='auto')

In [None]:
v2_model = joblib.load('validation_models_v2\\lgbm_s13_f98_b_sym.pkl')