In [1]:
!pip install -q wfdb
!pip install -q neurokit2
!pip install -q biosppy
!pip install vitaldb



In [2]:
import numpy as np
import neurokit2 as nk
import matplotlib.pyplot as plt
import os
import pandas as pd
import biosppy.signals.ecg as ecg
import random
from tqdm.notebook import tqdm
import vitaldb
from tqdm.notebook import tqdm

In [3]:
def r_wave_amplitude(r_peak_loc):
    if len(r_peak_loc) >= 1:
        r_amplitude = float(r_peak_loc.values[0])
        return r_amplitude
    return np.nan

def r_wave_duration(q_peak_loc, t_onset_loc):
    if len(q_peak_loc) >= 1 and len(t_onset_loc) >= 1:
        r_duration = float(q_peak_loc.index[0] - t_onset_loc.index[0])
        return r_duration
    return np.nan

def t_wave_amplitude(t_peak_loc):
    if len(t_peak_loc) >= 1:
        t_amplitude = float(t_peak_loc.values[0])
        return t_amplitude
    return np.nan

def t_wave_duration(t_offset_loc, t_onset_loc):
    if len(t_offset_loc) >= 1 and len(t_onset_loc) >= 1:
        t_duration = float(t_offset_loc.index[0] - t_onset_loc.index[0])
        return t_duration
    return np.nan

def s_wave_amplitude(s_peak_loc):
    if len(s_peak_loc) >= 1:
        s_amplitude = float(s_peak_loc.values[0])
        return s_amplitude
    return np.nan

def qt_interval(t_offset_loc, p_offset_loc):
    if len(t_offset_loc) >= 1 and len(p_offset_loc) >= 1:
        qt_interval = float(t_offset_loc.index[0] - p_offset_loc.index[0])
        return qt_interval
    return np.nan

def heart_rate_calc(hr, idx):
    if idx > 0:
        return float(hr[idx])
    return np.nan


def j_point_calc(s_peak_loc, t_peak_loc, x, y):
    if len(s_peak_loc) == 0 or len(t_peak_loc) == 0:
        return np.nan, np.nan, None
    if s_peak_loc.index[0] > t_peak_loc.index[0]:
        return np.nan, np.nan, None
    s_peak_ind = int(np.interp(s_peak_loc.index[0], x, np.arange(len(x))))
    t_peak_ind = int(np.interp(t_peak_loc.index[0], x, np.arange(len(x))))
    y_st_by_peaks = y[s_peak_ind:t_peak_ind+1]
    x_st_by_peaks = x[s_peak_ind:t_peak_ind+1]
    if t_peak_ind - s_peak_ind < 1:
        return np.nan, np.nan, None
    Fy = np.gradient(y_st_by_peaks)
    j_point_pseudo_ind = np.abs(Fy).argmin()
    j_point = [x_st_by_peaks[j_point_pseudo_ind], y_st_by_peaks[j_point_pseudo_ind]]
    j_point_ind = int(np.round(np.interp(x_st_by_peaks[j_point_pseudo_ind], x, np.arange(len(x)))))
    j_magnitude = j_point[1]

    return j_point, j_magnitude, j_point_ind

def st_duration_calc(t_peak_loc, j_point):
    if len(t_peak_loc) >= 1 and isinstance(j_point, list) and len(j_point) == 2:
        st_duration = t_peak_loc.index[0] - j_point[0]
        return st_duration
    return np.nan

def st_slope_calc(t_onset_loc, j_point_ind, x, y):
    if len(t_onset_loc) >= 1 and j_point_ind is not None:
        t_onset_ind = int(np.round(np.interp(t_onset_loc.index[0], x, np.arange(len(x)))))

        if t_onset_ind > j_point_ind:
            y_st_slope_range = y[j_point_ind:t_onset_ind+1]
            x_st_slope_range = x[j_point_ind:t_onset_ind+1]

            F_st_slope = np.gradient(y_st_slope_range)
            st_slope = np.mean(F_st_slope)
            st_slope_bool = st_slope > 0
            return st_slope
    return np.nan

def st_area_calc(t_offset_loc, j_point_ind, x, y,):
    if len(t_offset_loc) >= 1 and j_point_ind is not None:
        ind_t_offset = int(np.round(np.interp(t_offset_loc.index[0], x, np.arange(len(x)))))
        y_st_area_range = y[j_point_ind:ind_t_offset+1]
        x_st_area_range = x[j_point_ind:ind_t_offset+1]
        st_area = np.trapz(y_st_area_range, x_st_area_range)
        return st_area
    return np.nan





def extract_features(beats, features,case_id):

    p_onsets = features["ECG_P_Onsets"]
    r_onsets = features["ECG_R_Onsets"]
    t_onsets = features["ECG_T_Onsets"]
    p_offsets = features["ECG_P_Offsets"]
    r_offsets = features["ECG_R_Offsets"]
    t_offsets = features["ECG_T_Offsets"]
    r_peaks = features["ECG_R_Peaks"]
    t_peaks = features["ECG_T_Peaks"]
    q_peaks = features["ECG_Q_Peaks"]
    s_peaks = features["ECG_S_Peaks"]
    p_peaks = features["ECG_P_Peaks"]
    heart_rate = features["ECG_Rate"]
    cycles=0

    record_dataset = pd.DataFrame(columns=['Case_ID','Cycle','R_wave_amplitude', 'R_wave_duration', 'T_wave_amplitude', 'T_wave_duration', 'S_wave_amplitude', 'QT_interval', 'Heart_rate','J_point_amplitude', 'ST_duration', 'ST_area', 'ST_slope'])

    for i in tqdm(range(1, len(beats)+1)):
        beat = beats[str(i)]
        beat_signal = beat['Signal']
        time = beat_signal.index
        voltage = beat_signal.values
        beat_idx = beat['Index'].values
        min_idx, max_idx = beat_idx[0], beat_idx[-1]

        s_peak_loc = beat_signal.iloc[np.argwhere(s_peaks[min_idx:max_idx].values == 1).flatten()]
        r_onset_loc = beat_signal.iloc[np.argwhere(r_onsets[min_idx:max_idx].values == 1).flatten()]
        t_onset_loc = beat_signal.iloc[np.argwhere(t_onsets[min_idx:max_idx].values == 1).flatten()]
        p_offset_loc = beat_signal.iloc[np.argwhere(p_offsets[min_idx:max_idx].values == 1).flatten()]
        r_offset_loc = beat_signal.iloc[np.argwhere(r_offsets[min_idx:max_idx].values == 1).flatten()]
        t_offset_loc = beat_signal.iloc[np.argwhere(t_offsets[min_idx:max_idx].values == 1).flatten()]
        r_peak_loc = beat_signal.iloc[np.argwhere(r_peaks[min_idx:max_idx].values == 1).flatten()]
        t_peak_loc = beat_signal.iloc[np.argwhere(t_peaks[min_idx:max_idx].values == 1).flatten()]
        q_peak_loc = beat_signal.iloc[np.argwhere(q_peaks[min_idx:max_idx].values == 1).flatten()]


        r_amplitude = r_wave_amplitude(r_peak_loc)
        r_duration = r_wave_duration(q_peak_loc, r_onset_loc)
        t_amplitude = t_wave_amplitude(t_peak_loc)
        t_duration = t_wave_duration(t_offset_loc, t_onset_loc)
        s_amplitude = s_wave_amplitude(s_peak_loc)
        qt_int = qt_interval(t_offset_loc, p_offset_loc)
        hr = heart_rate_calc(heart_rate, (min_idx + max_idx) // 2)
        j_point, j_magnitude, j_point_ind = j_point_calc(s_peak_loc, t_peak_loc, time, voltage)
        st_duration = st_duration_calc(t_peak_loc, j_point)
        st_slope = st_slope_calc(t_onset_loc, j_point_ind, time, voltage)
        st_area = st_area_calc(t_offset_loc, j_point_ind, time, voltage)
        cycles+=1

        feature_row = {
            'Case_ID': case_id,
            'Cycle': cycles,
            'R_wave_amplitude': r_amplitude,
            'R_wave_duration': r_duration,
            'T_wave_amplitude': t_amplitude,
            'T_wave_duration': t_duration,
            'S_wave_amplitude': s_amplitude,
            'QT_interval': qt_int,
            'Heart_rate': hr,
            'J_point_amplitude': j_magnitude,
            'ST_duration': st_duration,
            'ST_area': st_area,
            'ST_slope': st_slope
        }

        feature_row = pd.DataFrame([feature_row])
        record_dataset = pd.concat([record_dataset, feature_row], ignore_index=True)

    return record_dataset






def engine(caseid,srate):
  vals = vitaldb.load_case(caseid, ['SNUADC/ECG_II'], 1 / srate)
  initial_signal = vals[:,0]


  print("Process Started For ", caseid)
  print("Signal Cleaning Started")

  op_st = df_cases[(df_cases['caseid'] == caseid)]['opstart'].values[0]
  op_end = df_cases[(df_cases['caseid'] == caseid)]['opend'].values[0]


  count = 0
  seconds = 0

  for i in initial_signal:
    count+=1
    if count*(1/500)>= op_st:
      break


  count2 = 0
  seconds = 0
  for i in initial_signal:
    count2+=1
    if count2*(1/500) >= op_end:
      break

  initial_signal = vals[:,0][count:count2]
  initial_signal = initial_signal[np.logical_not(np.isnan(initial_signal))]


  initial_signal = initial_signal[(initial_signal>= -.25) & (initial_signal<=.25)]
  signal_clean = nk.ecg_clean(initial_signal, sampling_rate=srate)
  print("Signal Cleaning Complete......")

  print("Features and Beats extracting Started......")
  features, _ = nk.ecg_process(signal_clean, sampling_rate=srate)
  beats = nk.ecg_segment(signal_clean, sampling_rate=srate)
  print("Features and Beats extracting Completed......")


  print("Making Dataset")
  data = extract_features(beats, features,caseid)
  print("Dataset Complete")
  return data



In [4]:
df_cases = pd.read_csv("https://api.vitaldb.net/cases")
df_trks = pd.read_csv('https://api.vitaldb.net/trks')
df_labs = pd.read_csv('https://api.vitaldb.net/labs')

In [5]:
# inclusion / exclusion criteria
caseids = list(
    set(df_trks.loc[df_trks['tname'] == 'SNUADC/ECG_II', 'caseid'])
)

print('Total {} cases found'.format(len(caseids)))

Total 6355 cases found


In [None]:
file_save_destination = '/content/drive/MyDrive/Iqram Sir/VitalDB/Vital_ECG_II.csv'

if os.path.exists(file_save_destination)==False:
  dataset_main = pd.DataFrame(columns=['Case_ID','Cycle','R_wave_amplitude', 'R_wave_duration', 'T_wave_amplitude', 'T_wave_duration', 'S_wave_amplitude', 'QT_interval', 'Heart_rate','J_point_amplitude', 'ST_duration', 'ST_area', 'ST_slope'])
  dataset_main.to_csv(file_save_destination,index=False)


srate = 100
dataset_main = pd.read_csv(file_save_destination)
case_complete_list = list(dataset_main["Case_ID"].unique())


for caseid in caseids:

  if caseid not in case_complete_list:
    try:
      data = engine(caseid, srate)

      dataset_main = pd.concat([dataset_main, data], axis=0)
      dataset_main.to_csv(file_save_destination,index=False)
      print("Dataset Concatination Complete")
      print("Process Complete for Case ID: ", caseid)
      print("............................................................")
      print("............................................................")

    except:
      print("----------------------------ERORR!!! occurred In CASE ID: ",caseid,"----------------------------------")
      print("Do not worry, ENGINE will handle it.......")
      print("............................................................")
      print("............................................................")


Process Started For  1
Signal Cleaning Started
Signal Cleaning Complete......
Features and Beats extracting Started......
Features and Beats extracting Completed......
Makind Dataset


  0%|          | 0/3879 [00:00<?, ?it/s]

Dataset Complete
Dataset Concatination Complete
Process Complete for Case ID:  1
............................................................
............................................................
Process Started For  2
Signal Cleaning Started
Signal Cleaning Complete......
Features and Beats extracting Started......
Features and Beats extracting Completed......
Makind Dataset


  0%|          | 0/11990 [00:00<?, ?it/s]

Dataset Complete
Dataset Concatination Complete
Process Complete for Case ID:  2
............................................................
............................................................
Process Started For  3
Signal Cleaning Started
----------------------------ERORR!!! occurred In CASE ID:  3 ----------------------------------
Process Started For  4
Signal Cleaning Started
Signal Cleaning Complete......
Features and Beats extracting Started......
Features and Beats extracting Completed......
Makind Dataset


  0%|          | 0/8889 [00:00<?, ?it/s]

Dataset Complete
Dataset Concatination Complete
Process Complete for Case ID:  4
............................................................
............................................................
Process Started For  5
Signal Cleaning Started
Signal Cleaning Complete......
Features and Beats extracting Started......
Features and Beats extracting Completed......
Makind Dataset


  0%|          | 0/10345 [00:00<?, ?it/s]

Dataset Complete
Dataset Concatination Complete
Process Complete for Case ID:  5
............................................................
............................................................
Process Started For  6
Signal Cleaning Started
----------------------------ERORR!!! occurred In CASE ID:  6 ----------------------------------
Process Started For  7
Signal Cleaning Started
----------------------------ERORR!!! occurred In CASE ID:  7 ----------------------------------
Process Started For  8
Signal Cleaning Started
Signal Cleaning Complete......
Features and Beats extracting Started......


In [None]:
dataset_main