## Autoencoder Trained in Epilepsy Respiratory Data

In [2]:
import os
import pickle

import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

from preepiseizures.src import Patient
import Respiration_2023

In [39]:
patient = 'SYRH'
data = pd.read_parquet('data/respiration/{patient}_all_respiration_data.parquet'.format(patient=patient))

patient_info = Patient.patient_class(patient)
patient_info.get_seizure_annotations()
patient_info.seizure_table

data['datetime'] += pd.Timedelta(patient_info.patient_dict['temporal_shift'])

data.head(5)

Unnamed: 0,RESP,ECG,datetime
0,6.596455,306.963019,2021-04-13 05:36:37.509215000
1,13.322916,309.350113,2021-04-13 05:36:37.521715173
2,11.673581,310.560687,2021-04-13 05:36:37.534215347
3,12.690194,312.275954,2021-04-13 05:36:37.546715520
4,12.008034,311.656591,2021-04-13 05:36:37.559215694


## Autoencoder Training

In [40]:
# use 20% of the data for training
    # timestamps of the segments
slide, window = 1, 60
timestamps_segments = pd.date_range(start=data['datetime'].iloc[0], end=data['datetime'].iloc[-1]-pd.Timedelta(seconds=60), freq=f'{slide}s')
# only use 20% of the data for training
timestamps_segments_train_limit = timestamps_segments[0] + (timestamps_segments[-1] - timestamps_segments[0]) * 0.2
timestamps_segments_train = timestamps_segments[timestamps_segments<timestamps_segments_train_limit]
train_data = data.loc[data['datetime'].between(timestamps_segments_train[0], timestamps_segments_train[-1])].copy()

train_path = f'preepiseizures{os.sep}sudep_analysis{os.sep}{patient}_data_segments_train_20p_{slide}s.parquet'
train_segments = Respiration_2023.epilepsy_dataset(train_data, timestamps_segments_train, train_path, slide=slide, window=window)

label = f'preepiseizures{os.sep}sudep_analysis{os.sep}{patient}_1s'
modelAE, encAE, decAE = Respiration_2023.respiration_training(train_segments, again=False, label=label)


Processing segment 69839/69901
Autoencoder Created:
Layers: [750, 500, 300, 250, 100]
Input Length: 1000
Compression: 100
Activation: relu
Optimizer: adam
Loss: mse

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/30

## Autoencoder Validation

In [51]:

timestamps_segments_validation = pd.date_range(start=timestamps_segments_train_limit, end=data['datetime'].iloc[-1]-pd.Timedelta(seconds=60), freq='60s')

validation_data = data.loc[data['datetime'].between(timestamps_segments_validation[0], timestamps_segments_validation[-1])].copy()
validation_segments = Respiration_2023.epilepsy_dataset(validation_data, timestamps_segments_validation, 
                                                        train_path=f'preepiseizures{os.sep}sudep_analysis{os.sep}{patient}_validation_data_60s_timecorrected.parquet', slide=60, window=window)
output_val = modelAE.predict(validation_segments)

# IO Correlation
corr_ = np.array(list(map(lambda i: Respiration_2023.correlation(output_val[i], validation_segments.iloc[i], points=1), range(len(output_val)))))
corr_df = pd.DataFrame(corr_, index= validation_segments.index, columns=[str(i) for i in range(len(corr_[0]))])
time_corr_points = [pd.date_range(corr_df.index[i], corr_df.index[i]+pd.Timedelta(seconds=60), periods=1) for i in range(len(corr_df.index))]
corr_points = pd.DataFrame(np.hstack(corr_df.values), index=np.hstack(time_corr_points))


Processing segment 2458/4661

KeyboardInterrupt: 

In [None]:


output_train = modelAE.predict(train_segments[::60])

corr_train = np.array(list(map(lambda i: Respiration_2023.correlation(output_train[i], train_segments[::60].iloc[i], points=1), range(len(output_train)))))
corr_df_train = pd.DataFrame(corr_train, index= train_segments[::60].index, columns=[str(i) for i in range(len(corr_train[0]))])
time_corr_points_train = [pd.date_range(corr_df_train.index[i], corr_df_train.index[i]+pd.Timedelta(seconds=60), periods=1) for i in range(len(corr_df_train.index))]
corr_points_train = pd.DataFrame(np.hstack(corr_df_train.values), index=np.hstack(time_corr_points_train))




In [None]:
def quality_check(x):
    if len(x) > 750:
        if (x['ECG'].std() < 50) & (x['ECG'].std() > 5):
            return x

In [None]:
data_quality = validation_data.resample(on='datetime', rule='10S').apply(quality_check)

In [None]:
data_quality['datetime'] = validation_data.loc[validation_data.index.isin(data_quality.index)]['datetime']


In [None]:
data_quality

Unnamed: 0,RESP,ECG,datetime
3530617,-0.218491,298.158601,2020-01-08 06:43:20.000821130
3530618,-0.218957,279.151778,2020-01-08 06:43:20.013321170
3530619,-0.218704,291.081004,2020-01-08 06:43:20.025821210
3530620,-0.219117,303.395464,2020-01-08 06:43:20.038321250
3530621,-0.218812,287.905772,2020-01-08 06:43:20.050821290
...,...,...,...
9592572,-2.716113,297.122304,2020-01-10 09:23:09.944168530
9592573,-2.834005,307.291485,2020-01-10 09:23:09.956669688
9592574,-2.954047,308.220329,2020-01-10 09:23:09.969170847
9592575,-3.073736,314.902018,2020-01-10 09:23:09.981672005


In [None]:
fig = px.line(data.iloc[:80000], x='datetime', y='RESP')
fig.show()

In [None]:
corr_points['sec'] = corr_points.index.round('S')
data_quality['sec'] = data_quality['datetime'].dt.round('S')
corr_points_quality = corr_points.loc[corr_points['sec'].isin(data_quality['sec'])]

In [None]:
fig_corr_points = corr_points_train.copy()
fig_corr_points['type1'] = 'train'
corr_points_quality['type1'] = 'validation'
fig_corr_points = pd.concat([fig_corr_points, corr_points_quality.copy()])


fig = px.scatter(fig_corr_points, y=0, title='Correlation between input and output of the autoencoder', color='type1')
for i in range(len(patient_info.seizure_table)):
    fig.add_vrect(x0=patient_info.seizure_table['Timestamp'].iloc[i], x1=patient_info.seizure_table['Timestamp'].iloc[i]+pd.Timedelta(minutes=5), 
                  fillcolor="red", opacity=0.25, line_width=0)

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:

from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM

model = 'IF'

if model == 'IF':
    # Train the Isolation Forest on the normal class (majority class)
    anomaly_detector = IsolationForest(contamination=0.05)  # Adjust contamination as needed
    anomaly_detector.fit(pd.concat((train_segments, validation_segments)))

if model == 'SVM':
    # Train the One-Class SVM on the normal class (majority class)
    anomaly_detector = OneClassSVM(nu=0.05, kernel='rbf', gamma='scale')  # Adjust parameters as needed
    anomaly_detector.fit(pd.concat((train_segments[::60], validation_segments)))

training_anomaly_scores = anomaly_detector.decision_function(train_segments[::60])
val_anomaly_scores = anomaly_detector.decision_function(validation_segments)


In [None]:
fig = px.scatter(training_anomaly_scores)
fig.add_trace(px.scatter(val_anomaly_scores).data[0])
fig.show()