<a href="https://colab.research.google.com/github/RafsanJany-44/ARC/blob/master/Untitled153.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

!pip install pyvital vitaldb vital
!pip install neurokit2
import vitaldb
import pandas as pd
import numpy as np
from datetime import timedelta
from tqdm.notebook import tqdm

df_cases = pd.read_csv("https://api.vitaldb.net/cases")  # Load clinical data
df_trks = pd.read_csv('https://api.vitaldb.net/trks')  # Load track list
df_labs = pd.read_csv('https://api.vitaldb.net/labs')  # Load lab result

Collecting pyvital
  Downloading pyvital-0.3.5-py3-none-any.whl (4.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m36.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting vitaldb
  Downloading vitaldb-1.4.2-py3-none-any.whl (54 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.0/55.0 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting vital
  Downloading vital-1.4.10-py3-none-any.whl (17 kB)
Collecting sanic (from pyvital)
  Downloading sanic-23.6.0-py3-none-any.whl (202 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m202.5/202.5 kB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
Collecting wfdb (from vitaldb)
  Downloading wfdb-4.1.2-py3-none-any.whl (159 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.0/160.0 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
Collecting arrow (from vital)
  Downloading arrow-1.2.3-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [45]:
def r_wave_amplitude(r_peak_loc):
    if len(r_peak_loc) >= 1:
        r_amplitude = float(r_peak_loc.values[0])
        return r_amplitude
    return np.nan

def r_wave_duration(q_peak_loc, t_onset_loc):
    if len(q_peak_loc) >= 1 and len(t_onset_loc) >= 1:
        r_duration = float(q_peak_loc.index[0] - t_onset_loc.index[0])
        return r_duration
    return np.nan

def t_wave_amplitude(t_peak_loc):
    if len(t_peak_loc) >= 1:
        t_amplitude = float(t_peak_loc.values[0])
        return t_amplitude
    return np.nan

def t_wave_duration(t_offset_loc, t_onset_loc):
    if len(t_offset_loc) >= 1 and len(t_onset_loc) >= 1:
        t_duration = float(t_offset_loc.index[0] - t_onset_loc.index[0])
        return t_duration
    return np.nan

def s_wave_amplitude(s_peak_loc):
    if len(s_peak_loc) >= 1:
        s_amplitude = float(s_peak_loc.values[0])
        return s_amplitude
    return np.nan

def qt_interval(t_offset_loc, p_offset_loc):
    if len(t_offset_loc) >= 1 and len(p_offset_loc) >= 1:
        qt_interval = float(t_offset_loc.index[0] - p_offset_loc.index[0])
        return qt_interval
    return np.nan

def heart_rate_calc(hr, idx):
    if idx > 0:
        return float(hr[idx])
    return np.nan


def j_point_calc(s_peak_loc, t_peak_loc, x, y):
    if len(s_peak_loc) == 0 or len(t_peak_loc) == 0:
        return np.nan, np.nan, None
    if s_peak_loc.index[0] > t_peak_loc.index[0]:
        return np.nan, np.nan, None
    s_peak_ind = int(np.interp(s_peak_loc.index[0], x, np.arange(len(x))))
    t_peak_ind = int(np.interp(t_peak_loc.index[0], x, np.arange(len(x))))
    y_st_by_peaks = y[s_peak_ind:t_peak_ind+1]
    x_st_by_peaks = x[s_peak_ind:t_peak_ind+1]
    if t_peak_ind - s_peak_ind < 1:
        return np.nan, np.nan, None
    Fy = np.gradient(y_st_by_peaks)
    j_point_pseudo_ind = np.abs(Fy).argmin()
    j_point = [x_st_by_peaks[j_point_pseudo_ind], y_st_by_peaks[j_point_pseudo_ind]]
    j_point_ind = int(np.round(np.interp(x_st_by_peaks[j_point_pseudo_ind], x, np.arange(len(x)))))
    j_magnitude = j_point[1]

    return j_point, j_magnitude, j_point_ind

def st_duration_calc(t_peak_loc, j_point):
    if len(t_peak_loc) >= 1 and isinstance(j_point, list) and len(j_point) == 2:
        st_duration = t_peak_loc.index[0] - j_point[0]
        return st_duration
    return np.nan

def st_slope_calc(t_onset_loc, j_point_ind, x, y):
    if len(t_onset_loc) >= 1 and j_point_ind is not None:
        t_onset_ind = int(np.round(np.interp(t_onset_loc.index[0], x, np.arange(len(x)))))

        if t_onset_ind > j_point_ind:
            y_st_slope_range = y[j_point_ind:t_onset_ind+1]
            x_st_slope_range = x[j_point_ind:t_onset_ind+1]

            F_st_slope = np.gradient(y_st_slope_range)
            st_slope = np.mean(F_st_slope)
            st_slope_bool = st_slope > 0
            return st_slope
    return np.nan

def st_area_calc(t_offset_loc, j_point_ind, x, y,):
    if len(t_offset_loc) >= 1 and j_point_ind is not None:
        ind_t_offset = int(np.round(np.interp(t_offset_loc.index[0], x, np.arange(len(x)))))
        y_st_area_range = y[j_point_ind:ind_t_offset+1]
        x_st_area_range = x[j_point_ind:ind_t_offset+1]
        st_area = np.trapz(y_st_area_range, x_st_area_range)
        return st_area
    return np.nan





def extract_features(beats, features,case_id):

    p_onsets = features["ECG_P_Onsets"]
    r_onsets = features["ECG_R_Onsets"]
    t_onsets = features["ECG_T_Onsets"]
    p_offsets = features["ECG_P_Offsets"]
    r_offsets = features["ECG_R_Offsets"]
    t_offsets = features["ECG_T_Offsets"]
    r_peaks = features["ECG_R_Peaks"]
    t_peaks = features["ECG_T_Peaks"]
    q_peaks = features["ECG_Q_Peaks"]
    s_peaks = features["ECG_S_Peaks"]
    p_peaks = features["ECG_P_Peaks"]
    heart_rate = features["ECG_Rate"]
    cycles=0

    record_dataset = pd.DataFrame(columns=['Case_ID','Cycle','R_wave_amplitude', 'R_wave_duration', 'T_wave_amplitude', 'T_wave_duration', 'S_wave_amplitude', 'QT_interval', 'Heart_rate','J_point_amplitude', 'ST_duration', 'ST_area', 'ST_slope'])

    for i in tqdm(range(1, len(beats)+1)):
        beat = beats[str(i)]
        beat_signal = beat['Signal']
        time = beat_signal.index
        voltage = beat_signal.values
        beat_idx = beat['Index'].values
        min_idx, max_idx = beat_idx[0], beat_idx[-1]

        s_peak_loc = beat_signal.iloc[np.argwhere(s_peaks[min_idx:max_idx].values == 1).flatten()]
        r_onset_loc = beat_signal.iloc[np.argwhere(r_onsets[min_idx:max_idx].values == 1).flatten()]
        t_onset_loc = beat_signal.iloc[np.argwhere(t_onsets[min_idx:max_idx].values == 1).flatten()]
        p_offset_loc = beat_signal.iloc[np.argwhere(p_offsets[min_idx:max_idx].values == 1).flatten()]
        r_offset_loc = beat_signal.iloc[np.argwhere(r_offsets[min_idx:max_idx].values == 1).flatten()]
        t_offset_loc = beat_signal.iloc[np.argwhere(t_offsets[min_idx:max_idx].values == 1).flatten()]
        r_peak_loc = beat_signal.iloc[np.argwhere(r_peaks[min_idx:max_idx].values == 1).flatten()]
        t_peak_loc = beat_signal.iloc[np.argwhere(t_peaks[min_idx:max_idx].values == 1).flatten()]
        q_peak_loc = beat_signal.iloc[np.argwhere(q_peaks[min_idx:max_idx].values == 1).flatten()]


        r_amplitude = r_wave_amplitude(r_peak_loc)
        r_duration = r_wave_duration(q_peak_loc, r_onset_loc)
        t_amplitude = t_wave_amplitude(t_peak_loc)
        t_duration = t_wave_duration(t_offset_loc, t_onset_loc)
        s_amplitude = s_wave_amplitude(s_peak_loc)
        qt_int = qt_interval(t_offset_loc, p_offset_loc)
        hr = heart_rate_calc(heart_rate, (min_idx + max_idx) // 2)
        j_point, j_magnitude, j_point_ind = j_point_calc(s_peak_loc, t_peak_loc, time, voltage)
        st_duration = st_duration_calc(t_peak_loc, j_point)
        st_slope = st_slope_calc(t_onset_loc, j_point_ind, time, voltage)
        st_area = st_area_calc(t_offset_loc, j_point_ind, time, voltage)
        cycles+=1

        feature_row = {
            'Case_ID': case_id,
            'Cycle': cycles,
            'R_wave_amplitude': r_amplitude,
            'R_wave_duration': r_duration,
            'T_wave_amplitude': t_amplitude,
            'T_wave_duration': t_duration,
            'S_wave_amplitude': s_amplitude,
            'QT_interval': qt_int,
            'Heart_rate': hr,
            'J_point_amplitude': j_magnitude,
            'ST_duration': st_duration,
            'ST_area': st_area,
            'ST_slope': st_slope
        }

        feature_row = pd.DataFrame([feature_row])
        record_dataset = pd.concat([record_dataset, feature_row], ignore_index=True)

    return record_dataset






def engine(initial_signal,caseid):
  print("Signal Cleaning Started")
  initial_signal = initial_signal[np.logical_not(np.isnan(initial_signal))]


  initial_signal = initial_signal[(initial_signal>= -.25) & (initial_signal<=.25)]
  signal_clean = nk.ecg_clean(initial_signal, sampling_rate=srate)
  print("Signal Cleaning Complete......")

  print("Features and Beats extracting Started......")
  features, _ = nk.ecg_process(signal_clean, sampling_rate=srate)
  beats = nk.ecg_segment(signal_clean, sampling_rate=srate)
  print("Features and Beats extracting Completed......")


  print("Making Dataset")
  data = extract_features(beats, features,caseid)
  print("Dataset Complete")
  return data



In [2]:
caseids = list(
    set(df_trks[df_trks['tname'] == 'Solar8000/ST_II']['caseid']) &
    set(df_trks[df_trks['tname'] == 'Solar8000/ART_MBP']['caseid']) &
     set(df_trks[df_trks['tname'] == 'Solar8000/ART_SBP']['caseid'])&
     set(df_trks[df_trks['tname'] == 'Solar8000/ART_DBP']['caseid'])&
    set(df_trks[df_trks['tname'] == 'SNUADC/ECG_II']['caseid'])
)

print('Total {} cases found'.format(len(caseids)))

Total 3559 cases found


In [26]:
op_st = df_cases[(df_cases['caseid'] == caseids[0])]['opstart'].values[0]
op_end = df_cases[(df_cases['caseid'] == caseids[0])]['opend'].values[0]
op_st,op_end

(1668, 10368)

In [49]:
vals = vitaldb.load_case(caseids[0], ['SNUADC/ECG_II','Solar8000/ST_II','Solar8000/ART_MBP','Solar8000/ART_SBP','Solar8000/ART_DBP'], 1 / 100)


In [50]:
time_col=[]
for i in range(len(vals)):
  time_col.append(i*(1/500))

vals = np.insert(vals, 0, time_col, axis=1)
vals = np.insert(vals, 0, np.nan, axis=1)

In [63]:
vals  = vals[vals[:,1] >= op_st]
vals =  vals[vals[:,1] <= op_end]

In [67]:


sec_segment = 10
segment_num = 1
start_idx = 0
end_idx = int(sec_segment/(1/500) + 1)


for _ in tqdm(range(int(vals.shape[0]/int(sec_segment/(1/500) + 1)))):

  temp_arr = vals[start_idx:end_idx,:]
  arr1 = np.nanmean(temp_arr, axis = 0)

  print("Processing Segment:",segment_num)
  for i in tqdm(range(start_idx,end_idx)):
    vals[i][0] = segment_num
    vals[i][3] = arr1[3]
    vals[i][4] = arr1[4]
    vals[i][5] = arr1[5]
    vals[i][6] = arr1[6]

  start_idx = end_idx
  end_idx = end_idx + int(sec_segment/(1/500))
  segment_num += 1

  0%|          | 0/64 [00:00<?, ?it/s]

Processing Segment: 1


  arr1 = np.nanmean(temp_arr, axis = 0)


  0%|          | 0/5001 [00:00<?, ?it/s]

Processing Segment: 2


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 3


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 4


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 5


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 6


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 7


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 8


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 9


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 10


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 11


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 12


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 13


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 14


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 15


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 16


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 17


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 18


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 19


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 20


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 21


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 22


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 23


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 24


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 25


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 26


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 27


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 28


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 29


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 30


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 31


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 32


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 33


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 34


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 35


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 36


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 37


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 38


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 39


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 40


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 41


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 42


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 43


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 44


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 45


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 46


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 47


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 48


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 49


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 50


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 51


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 52


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 53


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 54


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 55


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 56


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 57


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 58


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 59


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 60


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 61


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 62


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 63


  0%|          | 0/5000 [00:00<?, ?it/s]

Processing Segment: 64


  0%|          | 0/5000 [00:00<?, ?it/s]

In [68]:

vals = vals[:int(vals.shape[0]/int(sec_segment/(1/500) + 1)) * 500]
