## Notebook to plot ACC Seizure Detection Plots

This notebook is focused solely on FBTC seizures.

A patient-specific approach will be tested

SET 2
- Training: BLIW_1, BLIW_2, YIVL_0, YIVL_1, AGGA_0, AGGA_1, AGGA_2, YWJN_5, YWJN_6 
- Testing: VNVW_1, WOSQ_2, AGGA_3, YIVL_2,  BLIW_3, YWJN_7

In [2]:
# import os
from datetime import datetime

# imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import seaborn as sns

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, recall_score, precision_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import NearMiss
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix


from fast_ml.feature_selection import get_constant_features

from preepiseizures.src import Patient

## 1. Open data

In [3]:
data = pd.read_parquet('./data/features/anomaly_all_patients_acc_features.parquet')

print(data.shape)

# remove columns with nans
data = data.dropna(axis=1)
print(data.shape)
data['patient'] = data['patient_seizure'].apply(lambda x: x.split('_')[0])
data['patient'].unique()

training_set_keys = ['BLIW_0', 'BLIW_1', 'BLIW_2', 'YIVL_0', 'YIVL_1', 'AGGA_0', 'AGGA_1', 'AGGA_2', 'YWJN_4', 'YWJN_5', 'YWJN_6'] 
testing_set_keys = ['VNVW_0', 'VNVW_1', 'WOSQ_2', 'AGGA_3', 'YIVL_2',  'BLIW_3', 'YWJN_7']
all_training_set = data[data['patient_seizure'].isin(training_set_keys)].copy()
all_testing_set = data[data['patient_seizure'].isin(testing_set_keys)].copy()

features_cols = all_training_set.drop(columns=['patient_seizure', 'patient', 'timestamp', 'y']).columns

(226737, 1559)
(226737, 1557)


### 2.2 Select patient data

In [4]:
patient = 'YIVL'
patient_set = data[data['patient'] == patient].copy()
# testing_set = data[data['patient'] == patient].copy()
patient_ = Patient.patient_class(patient)
patient_.get_seizure_annotations()
print(patient_set.shape)

(56846, 1558)


In [5]:
patient_.seizure_table

Unnamed: 0,Crises,Data,Hora Clínica,Localização,lado,Focal / Generalisada,Fim,Sono/ Vigília,Tipo,Timestamp
0,Crise1,2022-08-10,01:09:27,FC e PO,L,FBTC,01:11:20,Sono,Hiperventilação e taquicardia e TC,2022-08-10 01:09:27
1,Crise2,2022-08-10,10:37:26,FC e PO,L,FBTC,10:39:19,Sono?,Hiperventilação e taquicardia e TC,2022-08-10 10:37:26
2,Crise3,2022-08-11,01:36:30,FC e PO,L,FBTC,01:38:23,Sono,Hiperventilação e taquicardia e TC,2022-08-11 01:36:30


In [6]:

seiz_surrounding_index = []

for seizure in patient_.seizure_table.index:

    onset = patient_.seizure_table.loc[seizure]['Timestamp']
    # preictal period is 10 minutes before seizure
    start = onset - pd.Timedelta(minutes=10)
    try:
        end_time = datetime.combine(onset.date(), patient_.seizure_table.loc[seizure, 'Fim'])
    except:
        end_time = onset + pd.Timedelta(minutes=2)
    seiz_surrounding_index += [patient_set.loc[patient_set['timestamp'].between(start, onset - pd.Timedelta(seconds=30))].index]
    posictal_0 = end_time
    posictal_1 = onset + pd.Timedelta(minutes=10)
    seiz_surrounding_index += [patient_set.loc[patient_set['timestamp'].between(posictal_0, posictal_1)].index]
    if patient_.seizure_table.loc[seizure, 'Focal / Generalisada'] == 'FBTC':
        patient_set.loc[patient_set['timestamp'].between(onset, posictal_0), 'y'] = 1
    
seiz_surrounding_index = np.hstack(seiz_surrounding_index)      

patient_set.drop(seiz_surrounding_index, inplace=True)
print(patient_set.shape)

(56213, 1558)


In [7]:
posictal_0

datetime.datetime(2022, 8, 11, 1, 38, 23)

## Train test split

In [8]:
fig = px.scatter(patient_set, x='timestamp', y='y', title=f'Patient {patient} - Accelerometer Data')
fig.show()

In [9]:
# training will be the first 75% of the data whereas the testing will be the remaining 25%
# YIVL perc_cut = 0.76484
# AGGA perc_cut = 0.17
# BLIW perc_cut = 0.88
perc_cut = 0.88

training_set = patient_set.iloc[:int(patient_set.shape[0]*perc_cut)]
testing_set = patient_set.iloc[int(patient_set.shape[0]*perc_cut):]
print(training_set.shape)
print(testing_set.shape)
training_set.drop_duplicates(inplace=True)
fig = px.scatter(training_set, y='timestamp', color='y', title=f'Patient {patient} - Accelerometer Data')
fig.show()
fig = px.scatter(testing_set, y='timestamp', color='y', title=f'Patient {patient} - Accelerometer Data')
fig.show()

(49467, 1558)
(6746, 1558)




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [10]:

constant_features = get_constant_features(training_set[features_cols])
# print(constant_features)
exclude_cols = constant_features.Var
training_set.drop(columns=exclude_cols, inplace=True)
print('Number of features excluded: ', len(exclude_cols))
features_cols = training_set.drop(columns=['patient_seizure', 'patient', 'timestamp', 'y']).columns

corr_matrix = training_set[features_cols].corr().abs()

# Select upper triangle of correlation matrix
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))

# Find features with correlation greater than 0.95
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
print(f'Dropping {len(to_drop)} features from {corr_matrix.shape[1]}.')

# Drop features 
#if to_drop[0] in training_set.columns:
#    training_set.drop(to_drop, axis=1, inplace=True)
new_training_set = training_set.drop(columns=to_drop)
new_testing_set = testing_set.drop(columns=to_drop)
features_cols = new_training_set.drop(columns=['patient_seizure', 'timestamp', 'y', 'patient']).columns

print('New shape: ', new_training_set.shape)
print(len(features_cols))

scaler = MinMaxScaler()
scaler.fit(new_training_set[features_cols])
new_training_set[features_cols] = scaler.transform(new_training_set[features_cols])
new_testing_set[features_cols] = scaler.transform(new_testing_set[features_cols])




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Number of features excluded:  58
Dropping 995 features from 1496.
New shape:  (20069, 505)
501


## Features Selection

In [11]:

X_train_ = new_training_set[features_cols].copy()
y_train_ = new_training_set['y'].copy()
X_test_ = new_testing_set[features_cols].copy()
y_test_ = new_testing_set['y'].copy()


percentile = 2
selector_anova = SelectPercentile(score_func=f_classif, percentile=percentile)
X_train_ANOVA = selector_anova.fit_transform(X_train_, y_train_)
X_test_ANOVA = selector_anova.transform(X_test_)


feature_index = selector_anova.get_support(indices=True)
feature_names = X_train_.columns[feature_index]

# Convert selected features to DataFrame
features_anova_train1 = pd.DataFrame(X_train_ANOVA, columns=feature_names, index=X_train_.index)
features_anova_test1 = pd.DataFrame(X_test_ANOVA, columns=feature_names, index=X_test_.index)

print('Number of features selected: ', len(feature_names))

training_set_keys = new_training_set['patient_seizure'].unique()

validation_set_keys = []
training_set_keys_subset = training_set_keys[:-1]





Number of features selected:  10


In [13]:
fig = px.scatter(new_training_set, y='timestamp', color='y', title=f'Patient {patient} - Accelerometer Data')
fig.show()

In [15]:
idx_cut = new_training_set.iloc[int(new_training_set.shape[0]*0.85)].name
print(idx_cut)
features_anova_train1.sort_index(inplace=True)
X_train = features_anova_train1.loc[:idx_cut]
X_val = features_anova_train1.loc[idx_cut:]
X_test = features_anova_test1.copy()

# y_train = y_train_.loc[:int(new_training_set.shape[0]*0.75)]
y_train = new_training_set.loc[X_train.index, 'y'].copy()
y_val = new_training_set.loc[X_val.index, 'y'].copy()
y_test = y_test_.copy()


fig = px.scatter(y_train, x=new_training_set.loc[X_train.index, 'timestamp'], y='y', title=f'Patient {patient} - Accelerometer Data')
fig.show()
fig = px.scatter(y_val, x=new_training_set.loc[X_val.index, 'timestamp'], y='y', title=f'Patient {patient} - Accelerometer Data')
fig.show()

220590


In [16]:
test_time = new_testing_set['timestamp'].copy()
val_time = new_training_set.loc[X_val.index, 'timestamp'].copy()

seizure_dict = {}
seizure_dict['test'] = patient_.seizure_table.loc[patient_.seizure_table['Timestamp'].between(test_time.iloc[0], test_time.iloc[-1])]
seizure_dict['val'] = patient_.seizure_table.loc[patient_.seizure_table['Timestamp'].between(val_time.iloc[0], val_time.iloc[-1])]
print(seizure_dict)

{'test':     Crises       Data Hora Clínica Localização lado Focal / Generalisada  \
7  Crise 8 2020-01-10     08:57:00    temporal    E                 FBTC   

  Unnamed: 6  Unnamed: 7 Sono/ Vigília                          Tipo  \
7        NaN         NaN       Vígilia  Generalização Tónico-clónica   

  Intervalo (10,10) onset no Bitalino           Timestamp  
7                                 NaN 2020-01-10 08:57:00  , 'val':     Crises       Data Hora Clínica Localização lado Focal / Generalisada  \
6  Crise 7 2020-01-10     00:59:30    temporal    E                 FBTC   

  Unnamed: 6  Unnamed: 7 Sono/ Vigília                          Tipo  \
6        NaN         NaN       Vígilia  Generalização Tónico-clónica   

  Intervalo (10,10) onset no Bitalino           Timestamp  
6                            19:12:00 2020-01-10 00:59:30  }


In [17]:
def decision_layer(df, life):
    """
    Decision layer to detect a seizure
    :param df: dataframe with y_pred column, timestamp column and index
    :return: index of seizure
    """
    consecutive_preds = 0
    seizure_counter_dict = {}
    # all index of N consecutive 1s
    # add a median filter to smooth the prediction
    i = 0
    alarm = False
    
    while i < len(df):
        # in an alarm is raised
        if (consecutive_preds >= life and not alarm):
            # raise an alarm
            seizure_counter_dict[df['time'].iloc[i]] = i
            alarm = True
            # wait for 
            # print('Seizure detected at index: ', i)
        else:
            if df['y_pred'].iloc[i] == 1:
                consecutive_preds += 1
            if df['y_pred'].iloc[i] == 0:
                consecutive_preds = 0
                alarm = False
        i += 1
    # after a consec_index wait for 2 minutes before the next prediction
    return seizure_counter_dict

def eval_alarms(preds, seizure):
    """
    Evaluate the alarms
    :param preds: list of predictions
    :param seizure: seizure dataframe
    """
    #print('Number of alarms: ', len(preds))
    #print('Number of seizures: ', len(seizure))

    if (seizure is None) or (len(preds) == 0):
        # every alarm is a false alarm
        return 0, len(preds)

    true_alarm, false_alarm = 0, 0
    if type(preds) == dict:
        preds = list(preds.keys())
    
    onset = seizure['Timestamp'].iloc[0]
    
    seizure_range_start = onset - pd.Timedelta(seconds=30)
    seizure_range_end = onset + pd.Timedelta(seconds=180)


    for alarm in preds:
        if (seizure_range_start <= alarm) and (alarm <= seizure_range_end):
            print('Seizure detected at: ', alarm - onset)
            true_alarm += 1
        else:
            print('False alarm at: ', alarm - onset)
            false_alarm += 1
    
    return true_alarm, false_alarm

def eval_models_AD(X_train, y_train, X_val, y_val, X_test, y_test, new_training_set, new_testing_set, patient_, seizure_dict=None):

    """
    Evaluate the model
    :return: None
    """
    life = 2
    n_seizures_val = (y_val.diff() == -1).astype(int).sum()
    n_seizures_test = (y_test.diff() == -1).astype(int).sum()
    n_seizures_train = (y_train.diff() == -1).astype(int).sum()
    threshold = X_train.min()
    y_pred = (X_val <= threshold).astype(int)
    recall = recall_score(y_val, y_pred)
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    # Print classification report and other metrics
    print(f"Recall: {np.round(100*recall, 2)} | Accuracy: {np.round(100*accuracy, 2)} | Precision: {np.round(100*precision, 2)} | F1-score: {np.round(100*f1, 2)}")

    #EVENT BASED ANALYSIS
    y_pred_df = pd.DataFrame(y_pred, columns=['y_pred'], index=y_val.index)
    y_pred_df['time'] = new_training_set.loc[y_pred_df.index, 'timestamp']
    y_pred_df.sort_index(inplace=True)
    
    val_alarms = decision_layer(y_pred_df, life)
    val_seizure = seizure_dict['val'] #int(validation_set_keys[0].split('_')[-1])
    tp, fp = eval_alarms(val_alarms, val_seizure)
    val_time = (y_pred_df['time'].iloc[-1] - y_pred_df['time'].iloc[0]).total_seconds()/3600
    fp_rate = np.round(fp/val_time,2)
    print(f'VAL SD: {tp}/{n_seizures_val} | VAL FAR: {fp_rate}/h')

    #TESTING SET
    y_pred_test = (X_test <= threshold).astype(int)

    y_pred_df_test = pd.DataFrame(y_pred_test, columns=['y_pred'], index=new_testing_set.index)
    y_pred_df_test['time'] = new_testing_set['timestamp']
    
    test_alarms = decision_layer(y_pred_df_test, life)
    test_seizure = seizure_dict['test'] #int(test_seizures[0].split('_')[-1])

    if len(test_seizure) > 1:
        print('deal with multiple seizures')
    if new_testing_set['y'].sum() == 0:
        print('No seizure in testing set')
        test_seizure = None
    if y_val.sum() == 0:
        print('No seizure in validation set')

    tp, fp = eval_alarms(test_alarms, test_seizure)
    
    test_time = (y_pred_df_test['time'].iloc[-1] - y_pred_df_test['time'].iloc[0]).total_seconds()/3600
    
    fp_rate = np.round(fp/test_time,2)


    print(f'TEST SD: {tp}/{n_seizures_test} | TEST FAR: {fp_rate}/h')
    print('\n')

    return y_pred_df, y_pred_df_test


def anomaly_detection(model, X_train, X_val, X_test, y_train, y_val, y_test, patient_, seizure_dict):

    if model == 'IF':
        # Train the Isolation Forest on the normal class (majority class)
        anomaly_detector = IsolationForest(contamination=0.05)  # Adjust contamination as needed
        anomaly_detector.fit(pd.concat((X_train, X_val)))

    if model == 'SVM':
        # Train the One-Class SVM on the normal class (majority class)
        anomaly_detector = OneClassSVM(nu=0.05, kernel='rbf', gamma='scale')  # Adjust parameters as needed
        anomaly_detector.fit(pd.concat((X_train, X_val)))
    
    training_anomaly_scores = anomaly_detector.decision_function(X_train)
    val_anomaly_scores = anomaly_detector.decision_function(X_val)
    test_anomaly_scores = anomaly_detector.decision_function(X_test)

#    classifiers = [('LR', LogisticRegression()), 
#                   ('RF', RandomForestClassifier(n_estimators=100, random_state=42)),
#                   ('GB', GradientBoostingClassifier(n_estimators=100, random_state=42))]

    pred, test = eval_models_AD(training_anomaly_scores, y_train, val_anomaly_scores, y_val, 
                                test_anomaly_scores, y_test, new_training_set, new_testing_set, 
                                patient_, seizure_dict)

    return training_anomaly_scores, val_anomaly_scores, test_anomaly_scores, pred, test


In [18]:
train_as_if, val_sc_if, test_sc_if, preds_if, tests_if = anomaly_detection('IF', X_train, X_val, X_test, y_train, y_val, y_test, patient_, seizure_dict)
train_as_svm, val_sc_svm, test_sc_svm, preds_svm, tests_svm = anomaly_detection('SVM', X_train, X_val, X_test, y_train, y_val, y_test, patient_, seizure_dict)


Recall: 20.83 | Accuracy: 99.37 | Precision: 100.0 | F1-score: 34.48
Seizure detected at:  0 days 00:01:40
VAL SD: 1/1 | VAL FAR: 0.0/h
Seizure detected at:  0 days 00:01:20
TEST SD: 1/1 | TEST FAR: 0.0/h


Recall: 25.0 | Accuracy: 99.4 | Precision: 100.0 | F1-score: 40.0
Seizure detected at:  0 days 00:01:35
VAL SD: 1/1 | VAL FAR: 0.0/h
Seizure detected at:  0 days 00:00:50
TEST SD: 1/1 | TEST FAR: 0.0/h




In [127]:
train_as_if = pd.DataFrame(val_sc_if, columns=['y_pred'], index=y_val.index)
train_as_if['time'] = new_training_set.loc[train_as_if.index, 'timestamp']
train_as_if['true'] = y_val
fig = px.scatter(train_as_if, x='time', y='y_pred', color='true', title=f'Patient {patient} - Accelerometer Data')
fig.show()