In [2]:
import pandas as pd 
import matplotlib.pyplot as plt
import datetime
import neurokit2 as nk
import numpy as np
%matplotlib inline

In [3]:
SAMPLING_RATE = 1000

In [4]:
df = pd.read_csv('../data/1222325cFnorm.csv')

In [5]:
ecg = df["ECG"].values.tolist()

### Downsample the data

In [6]:
ecg_ds = nk.signal_resample(ecg, sampling_rate=2000, desired_sampling_rate=SAMPLING_RATE)

In [7]:
ecg_df = pd.DataFrame(ecg_ds, columns=['ECG'])

In [8]:
ecg_df

Unnamed: 0,ECG
0,0.015600
1,0.015573
2,0.015637
3,0.015685
4,0.015700
...,...
35331833,-0.000582
35331834,-0.000804
35331835,-0.000689
35331836,-0.000695


In [9]:
# Take first n minutes
minutes = 15

n = minutes*60*SAMPLING_RATE
df = ecg_df[:n]

In [10]:
print(f"Signal duration: {datetime.timedelta(seconds=len(df.index)/SAMPLING_RATE)}")

Signal duration: 0:15:00


## Define LF/HF value ranges

### Paper: Herzig et al., Reproducibility of Heart Rate Variability Is Parameter and Sleep Stage Dependent (2017)

In [11]:
# stage_2 = {'min': 0.68, 'median': 1.11, 'max': 2.02}
# sws = {'min': 0.31 , 'median': 0.51, 'max': 0.90}
nrem = {'min': 0.31, 'median': (2.02-0.31)/2 , 'max': 2.02}
rem = {'min': 1.30, 'median': 2.02 , 'max': 3.22}

### Paper: Ako et al., Correlation between electroencephalography and heart rate variability during sleep (2003)

In [None]:
stage_1 = {'min': 2.30-0.29, 'median': 2.30, 'max': 2.30+0.29}
stage_2 = {'min': 1.85-0.09 , 'median': 1.85 , 'max': 1.85+0.09}
stage_3 = {'min': 0.78-0.06 , 'median': 0.78, 'max': 0.78+0.06}
stage_4 = {'min': 0.86-0.14, 'median': 0.86, 'max': 0.86+0.14}
rem = {'min': 2.51-0.17, 'median': 2.51, 'max': 2.51+0.17}

In [39]:
start = 0
# Number of data points in 5 minutes
window_size = 5*60*SAMPLING_RATE
end = window_size
values = []

while end <= df.index[-1]+1:
    stages = []

    df_5_min = df[start:end]
    ecg = df_5_min["ECG"].values.tolist()
    # Filter the data with ranges specified from Barbara
    filter_ecg = nk.signal_filter(ecg, sampling_rate=SAMPLING_RATE, lowcut=0.5, highcut=150)
    cleaned = nk.ecg_clean(filter_ecg, sampling_rate=SAMPLING_RATE, method="neurokit")
    peaks, info = nk.ecg_peaks(cleaned, sampling_rate=SAMPLING_RATE, method="neurokit")

    hrv_indices = nk.hrv(peaks, sampling_rate=SAMPLING_RATE)
    hrv_welch = nk.hrv_frequency(peaks, sampling_rate=SAMPLING_RATE, psd_method="welch")

    if  nrem['min'] <= hrv_indices['HRV_LFHF'][0] <= nrem['max'] + 1:
        stages.append('nrem')

    elif rem['min'] <= hrv_indices['HRV_LFHF'][0] <= rem['max'] + 1:
        stages.append('rem')

    

    values.append({
        'start_id': start,
        'end_id': end,
        'LF/HF': hrv_welch['HRV_LFHF'][0],
        'SD': hrv_indices['HRV_SDNN'][0],
        'stages': stages,
        'color': None
    })

    

    start = end
    end += window_size

In [40]:
values

[{'start_id': 0,
  'end_id': 300000,
  'LF/HF': 1.2104719065647447,
  'SD': 464.49439264217165,
  'stages': ['nrem'],
  'color': None},
 {'start_id': 300000,
  'end_id': 600000,
  'LF/HF': 0.5692369237953601,
  'SD': 142.4711915588191,
  'stages': ['nrem'],
  'color': None},
 {'start_id': 600000,
  'end_id': 900000,
  'LF/HF': 1.7610367863296106,
  'SD': 237.5962333178986,
  'stages': ['nrem'],
  'color': None}]

## Script using hrvana framework

https://github.com/bzhai/multimodal_sleep_stage_benchmark/blob/master/notebooks/Tutorial-HRV%20Feature%20Extraction%20From%20ECG.ipynb

In [1]:
import hrvanalysis as hrvana

In [15]:
start = 0
# Number of data points in 5 minutes
window_size = 5*60*SAMPLING_RATE
end = window_size
values = []

while end <= df.index[-1]+1:
    stages = []

    df_5_min = df[start:end]
    ecg = df_5_min["ECG"].values.tolist()

    # Filter the data with ranges specified from Barbara
    filter_ecg = nk.signal_filter(ecg, sampling_rate=SAMPLING_RATE, lowcut=0.5, highcut=150)
    cleaned = nk.ecg_clean(filter_ecg, sampling_rate=SAMPLING_RATE, method="pantompkins1985")
    pantompkins1985 = nk.ecg_findpeaks(cleaned, method="pantompkins1985") # find the R peaks
    hrv_df = pd.DataFrame(pantompkins1985)
    hrv_df["RR Intervals"] = hrv_df["ECG_R_Peaks"].diff() # calculate the value difference between two adjacent points
    hrv_df.loc[0, "RR Intervals"]=hrv_df.loc[0]['ECG_R_Peaks'] # the first datapoint contain Nan 

    # Clean RR intervals
    clean_rri = hrv_df['RR Intervals'].values
    clean_rri = hrvana.remove_outliers(rr_intervals=clean_rri, low_rri=300, high_rri=2000)
    clean_rri = hrvana.interpolate_nan_values(rr_intervals=clean_rri, interpolation_method="linear")
    clean_rri = hrvana.remove_ectopic_beats(rr_intervals=clean_rri, method="malik")
    clean_rri = hrvana.interpolate_nan_values(rr_intervals=clean_rri, interpolation_method="linear")

    # HRV feature extraction
    nn_epoch = hrv_df['RR Intervals'].values

    time_features = hrvana.get_time_domain_features(nn_epoch)
    frequency_features = hrvana.get_frequency_domain_features(nn_epoch)

    print(f"LF/HF ratio: {frequency_features['lf_hf_ratio']}")

    if  nrem['min'] <= frequency_features['lf_hf_ratio'] <= nrem['max'] + 1:
        stages.append('nrem')

    elif rem['min'] <= frequency_features['lf_hf_ratio'] <= rem['max'] + 1:
        stages.append('rem')

    

    values.append({
        'start_id': start,
        'end_id': end,
        'LF/HF': frequency_features['lf_hf_ratio'],
        'SD': time_features['sdnn'],
        'stages': stages,
        'color': None
    })

    

    start = end
    end += window_size

25 outlier(s) have been deleted.
The outlier(s) value(s) are : [22951.0, 3417.0, 2761.0, 2838.0, 2183.0, 2374.0, 257.0, 2923.0, 252.0, 5500.0, 4612.0, 287.0, 253.0, 272.0, 2274.0, 258.0, 260.0, 2181.0, 2393.0, 2218.0, 6466.0, 33460.0, 3508.0, 2359.0, 2048.0]
87 ectopic beat(s) have been deleted with malik rule.
LF/HF ratio: 4.482200674930411
4 outlier(s) have been deleted.
The outlier(s) value(s) are : [2324.0, 266.0, 293.0, 267.0]
31 ectopic beat(s) have been deleted with malik rule.
LF/HF ratio: 1.0421030976291497
3 outlier(s) have been deleted.
The outlier(s) value(s) are : [269.0, 286.0, 257.0]
39 ectopic beat(s) have been deleted with malik rule.
LF/HF ratio: 1.1058894636183454


In [14]:
values

[{'start_id': 0,
  'end_id': 300000,
  'LF/HF': 4.482200674930411,
  'SD': 2411.6198940304757,
  'stages': [],
  'color': None},
 {'start_id': 300000,
  'end_id': 600000,
  'LF/HF': 1.0421030976291497,
  'SD': 205.84365551014199,
  'stages': ['nrem'],
  'color': None},
 {'start_id': 600000,
  'end_id': 900000,
  'LF/HF': 1.1058894636183454,
  'SD': 178.70773954102447,
  'stages': ['nrem'],
  'color': None}]