# ECG Feature Extractor
## 2017 Physionet Challenge
### Sebastian D. Goodfellow, Ph.D.

# Setup Notebook

In [151]:
# Import 3rd party libraries
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

# Import local Libraries
sys.path.insert(0, os.path.dirname(os.getcwd()))
from features.feature_extractor import Features
from utils.plotting.waveforms import plot_waveforms

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


<br>
# Set Constants

In [2]:
# Sampling frequency (Hz)
fs = 300  

# Data paths
label_path = os.path.join(os.path.dirname(os.getcwd()), 'data', 'labels')
waveform_path = os.path.join(os.path.dirname(os.getcwd()), 'data', 'waveforms')
feature_path = os.path.join(os.path.dirname(os.getcwd()), 'data', 'features')

# Import ECG Labels

In [3]:
# Read labels CSV
labels = pd.read_csv(os.path.join(label_path, 'labels.csv'), names=['file_name', 'label'])

# View DataFrame
labels.head(10)

Unnamed: 0,file_name,label
0,A00021,N
1,A00022,~
2,A00023,O
3,A00024,O
4,A00025,N
5,A00026,N
6,A00027,A
7,A00028,N
8,A00029,O
9,A00030,O


# Plot ECG Waveforms

In [4]:
# Launch interactive plotting widget
plot_waveforms(labels=labels, waveform_path=waveform_path, fs=fs)

interactive(children=(IntSlider(value=4, description='index', max=9), Output()), _dom_classes=('widget-interac…

# Extract Features

In [5]:
# Instantiate
ecg_features = Features(file_path=waveform_path, fs=fs, feature_groups=['full_waveform_features'])

# Calculate ECG features
ecg_features.extract_features(
    filter_bandwidth=[3, 45], n_signals=None, show=True, 
    labels=labels, normalize=True, polarity_check=True,
    template_before=0.25, template_after=0.4
)

Finished extracting features from A00021.mat | Extraction time: 0.004 minutes
Finished extracting features from A00022.mat | Extraction time: 0.005 minutes
Finished extracting features from A00023.mat | Extraction time: 0.004 minutes
Finished extracting features from A00024.mat | Extraction time: 0.005 minutes
Finished extracting features from A00025.mat | Extraction time: 0.005 minutes
Finished extracting features from A00026.mat | Extraction time: 0.004 minutes
Finished extracting features from A00027.mat | Extraction time: 0.005 minutes
Finished extracting features from A00028.mat | Extraction time: 0.005 minutes
Finished extracting features from A00029.mat | Extraction time: 0.003 minutes
Finished extracting features from A00030.mat | Extraction time: 0.005 minutes


In [6]:
# Get features DataFrame
features = ecg_features.get_features()

# View DataFrame
features.head(10)

Unnamed: 0,file_name,label,full_waveform_duration,full_waveform_kurtosis,full_waveform_max,full_waveform_mean,full_waveform_median,full_waveform_min,full_waveform_skew,full_waveform_std,...,swt_d_3_energy_entropy,swt_d_3_high_power_ratio,swt_d_3_higuchi_fractal,swt_d_3_low_power_ratio,swt_d_3_med_power_ratio,swt_d_4_energy_entropy,swt_d_4_high_power_ratio,swt_d_4_higuchi_fractal,swt_d_4_low_power_ratio,swt_d_4_med_power_ratio
0,A00021,N,29.993334,17.586632,1.078687,-0.001071,-0.005645,-0.926257,1.123091,0.168382,...,-49257.007497,0.165148,1.603244,0.861031,0.106582,-67495.761089,0.112211,1.74563,0.876491,0.139795
1,A00022,~,29.993334,186.943393,11.336603,0.00592,-0.015021,-3.053789,8.539399,0.448065,...,-29308.911535,0.101234,1.681422,0.818841,0.103232,-47754.339321,0.122283,1.850699,0.833149,0.127526
2,A00023,O,29.993334,23.806715,1.084203,0.001047,-0.001793,-0.569955,3.847851,0.147708,...,-50412.22657,0.140089,1.652927,0.891568,0.118739,-68218.166248,0.098615,1.796991,0.905369,0.160293
3,A00024,O,29.993334,16.755881,1.127383,0.000431,-0.009887,-0.460385,3.331231,0.164943,...,-47704.749291,0.160745,1.552763,0.713365,0.086996,-66148.672913,0.106267,1.703458,0.729247,0.10763
4,A00025,N,29.993334,20.460997,1.099219,0.001304,-0.005782,-0.37602,3.705835,0.148803,...,-51189.04201,0.14049,1.629942,0.619724,0.101542,-69002.674228,0.097502,1.784161,0.633528,0.139208
5,A00026,N,29.993334,26.423581,1.088726,0.000484,-0.005873,-0.300124,4.351558,0.138428,...,-53693.44987,0.163446,1.598554,0.73022,0.104961,-72170.234663,0.110092,1.743084,0.746238,0.136632
6,A00027,A,29.993334,4.182249,1.142597,-0.004536,-0.008124,-1.26173,-0.093087,0.29554,...,-42018.523028,0.107055,1.604812,0.598398,0.11526,-60561.603002,0.098163,1.776238,0.612691,0.103083
7,A00028,N,59.993334,24.053043,1.058158,0.000643,-0.003022,-0.400153,4.127633,0.146124,...,-107837.970823,0.218004,1.514557,0.832327,0.092834,-144336.658169,0.156514,1.640446,0.846448,0.1057
8,A00029,O,29.993334,12.911929,1.20193,0.001085,0.008018,-1.129869,1.003582,0.204132,...,-43041.145948,0.145105,1.659092,0.654917,0.133569,-60798.823282,0.10187,1.794644,0.667569,0.176868
9,A00030,O,59.993334,17.591774,1.122591,-0.000551,-0.002507,-1.394709,0.832302,0.171259,...,-99939.554307,0.16248,1.610238,0.847671,0.107021,-136124.687363,0.113337,1.756203,0.861335,0.142411


<br>
# Save Features

In [7]:
# Save features DataFrame to CSV
features.to_csv(os.path.join(feature_path, 'features.csv'), index=False)

In [20]:
features.columns

Index(['file_name', 'label', 'full_waveform_duration',
       'full_waveform_kurtosis', 'full_waveform_max', 'full_waveform_mean',
       'full_waveform_median', 'full_waveform_min', 'full_waveform_skew',
       'full_waveform_std', 'swt_a_1_energy_entropy',
       'swt_a_1_high_power_ratio', 'swt_a_1_higuchi_fractal',
       'swt_a_1_low_power_ratio', 'swt_a_1_med_power_ratio',
       'swt_a_2_energy_entropy', 'swt_a_2_high_power_ratio',
       'swt_a_2_higuchi_fractal', 'swt_a_2_low_power_ratio',
       'swt_a_2_med_power_ratio', 'swt_a_3_energy_entropy',
       'swt_a_3_high_power_ratio', 'swt_a_3_higuchi_fractal',
       'swt_a_3_low_power_ratio', 'swt_a_3_med_power_ratio',
       'swt_a_4_energy_entropy', 'swt_a_4_high_power_ratio',
       'swt_a_4_higuchi_fractal', 'swt_a_4_low_power_ratio',
       'swt_a_4_med_power_ratio', 'swt_d_1_energy_entropy',
       'swt_d_1_high_power_ratio', 'swt_d_1_higuchi_fractal',
       'swt_d_1_low_power_ratio', 'swt_d_1_med_power_ratio',
       '

In [24]:
import numpy as np
ecg = np.load('C:/Users/yoonji811/OneDrive - 한양대학교/HYUscec/15.산학프로젝트/미니프로젝트/ECG5_reshape.npy')

In [25]:
np.shape(ecg)

(5000, 12, 5000, 1)

In [28]:
ecg = ecg[:,:,:,0]
np.shape(ecg)

(5000, 12, 5000)

In [29]:
np.shape(ecg[0])

(12, 5000)

In [33]:
ecg[0]

array([[-253.76, -253.76, -253.76, ...,  -34.16,   24.4 ,    4.88],
       [ 263.52,  263.52,  263.52, ...,  -68.32,  -48.8 ,  -34.16],
       [ 517.28,  517.28,  517.28, ...,  -34.16,  -73.2 ,  -39.04],
       ...,
       [ 810.08,  810.08,  810.08, ..., -204.96, -200.08, -170.8 ],
       [ 810.08,  810.08,  810.08, ..., -200.08, -195.2 , -165.92],
       [ 527.04,  527.04,  527.04, ...,  102.48,   92.72,  112.24]])

In [35]:
ecg[0][[0,2,-1],:]

array([[-253.76, -253.76, -253.76, ...,  -34.16,   24.4 ,    4.88],
       [ 517.28,  517.28,  517.28, ...,  -34.16,  -73.2 ,  -39.04],
       [ 527.04,  527.04,  527.04, ...,  102.48,   92.72,  112.24]])

In [36]:
a = [0,2,-1]

In [37]:
ecg[0][a,:]

array([[-253.76, -253.76, -253.76, ...,  -34.16,   24.4 ,    4.88],
       [ 517.28,  517.28,  517.28, ...,  -34.16,  -73.2 ,  -39.04],
       [ 527.04,  527.04,  527.04, ...,  102.48,   92.72,  112.24]])

In [38]:
for i in ecg[0][a,:]:
    print(i)

[-253.76 -253.76 -253.76 ...  -34.16   24.4     4.88]
[517.28 517.28 517.28 ... -34.16 -73.2  -39.04]
[527.04 527.04 527.04 ... 102.48  92.72 112.24]


In [40]:
len(ecg[0])

12

In [42]:
b = [i for i in range(len(ecg[0]))]

In [44]:
ecg[0] == ecg[0][b,:]

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])

In [46]:
for i,j in zip(a, ecg[0][a,:]):
    print(i)
    print(j)

0
[-253.76 -253.76 -253.76 ...  -34.16   24.4     4.88]
2
[517.28 517.28 517.28 ... -34.16 -73.2  -39.04]
-1
[527.04 527.04 527.04 ... 102.48  92.72 112.24]


In [52]:
np.shape(ecg[0:2,:,:])

(2, 12, 5000)

In [141]:
labels = pd.DataFrame({'patientIdx':[0,1,2,3,4]})
labels

Unnamed: 0,patientIdx
0,0
1,1
2,2
3,3
4,4


In [154]:
# Instantiate
fs = 500
ecg_features = Features(ecgarray = ecg[5:10,:,:],leads=[0,2,4], fs=fs, feature_groups=['full_waveform_features'])

# Calculate ECG features
ecg_features.extract_features(
    filter_bandwidth=[3, 45], n_signals=None, show=True, 
    labels=labels, normalize=True, polarity_check=True,
    template_before=0.25, template_after=0.4
)

Finished extracting features from 0th patient | Extraction time: 0.007 minutes
Finished extracting features from 1th patient | Extraction time: 0.007 minutes
Finished extracting features from 2th patient | Extraction time: 0.007 minutes
Finished extracting features from 3th patient | Extraction time: 0.006 minutes
Finished extracting features from 4th patient | Extraction time: 0.006 minutes


In [155]:
# Get features DataFrame
features = ecg_features.get_features()

# View DataFrame
features

Unnamed: 0,patientIdx,full_waveform_duration_0,full_waveform_duration_2,full_waveform_duration_4,full_waveform_kurtosis_0,full_waveform_kurtosis_2,full_waveform_kurtosis_4,full_waveform_max_0,full_waveform_max_2,full_waveform_max_4,...,swt_d_4_high_power_ratio_4,swt_d_4_higuchi_fractal_0,swt_d_4_higuchi_fractal_2,swt_d_4_higuchi_fractal_4,swt_d_4_low_power_ratio_0,swt_d_4_low_power_ratio_2,swt_d_4_low_power_ratio_4,swt_d_4_med_power_ratio_0,swt_d_4_med_power_ratio_2,swt_d_4_med_power_ratio_4
0,0,9.996,9.996,9.996,25.890774,14.118498,22.179833,1.081637,1.294737,1.106111,...,0.095438,1.355721,1.401414,1.380381,0.656938,0.709017,0.662028,0.141835,0.183509,0.151556
1,1,9.996,9.996,9.996,15.714028,5.638074,10.019322,1.12486,1.551256,1.205381,...,0.120126,1.409789,1.438883,1.425172,0.738589,0.617857,0.700774,0.250887,0.202837,0.190831
2,2,9.996,9.996,9.996,23.246216,18.160616,23.851977,1.07176,1.162867,1.105535,...,0.110093,1.420294,1.419038,1.416991,0.632336,0.672839,0.634655,0.173612,0.183041,0.172685
3,3,9.996,9.996,9.996,28.37796,34.498405,34.894775,1.044313,1.025868,1.040858,...,0.108602,1.409326,1.390855,1.398828,0.701955,0.68923,0.74939,0.225016,0.17402,0.218728
4,4,9.996,9.996,9.996,18.937197,31.89222,22.217475,1.128272,1.028866,1.108446,...,0.108794,1.373611,1.382656,1.38284,0.726107,0.69991,0.786231,0.202695,0.239776,0.235114


In [148]:
features.iloc[:,12:]

Unnamed: 0,full_waveform_duration_9,full_waveform_kurtosis_0,full_waveform_kurtosis_1,full_waveform_kurtosis_10,full_waveform_kurtosis_11,full_waveform_kurtosis_2,full_waveform_kurtosis_3,full_waveform_kurtosis_4,full_waveform_kurtosis_5,full_waveform_kurtosis_6,...,swt_d_4_med_power_ratio_10,swt_d_4_med_power_ratio_11,swt_d_4_med_power_ratio_2,swt_d_4_med_power_ratio_3,swt_d_4_med_power_ratio_4,swt_d_4_med_power_ratio_5,swt_d_4_med_power_ratio_6,swt_d_4_med_power_ratio_7,swt_d_4_med_power_ratio_8,swt_d_4_med_power_ratio_9
0,9.996,25.890774,28.004396,26.914884,27.947339,14.118498,27.192936,22.179833,27.361911,28.216905,...,0.128912,0.129015,0.183509,0.137108,0.151556,0.176394,0.217587,0.088591,0.088258,0.118667
1,9.996,15.714028,15.912126,17.050485,20.207347,5.638074,18.277523,10.019322,9.158388,12.371678,...,0.22672,0.230716,0.202837,0.283951,0.190831,0.355822,0.237076,0.174085,0.195233,0.221504
2,9.996,23.246216,18.843591,26.452645,25.794204,18.160616,21.681886,23.851977,11.127415,11.261527,...,0.172698,0.165214,0.183041,0.180251,0.172685,0.266865,0.248538,0.088999,0.196014,0.178352
3,9.996,28.37796,27.113252,32.289533,34.416925,34.498405,29.360963,34.894775,29.212189,14.769551,...,0.185212,0.18452,0.17402,0.175976,0.218728,0.146089,0.202322,0.147469,0.10357,0.161166
4,9.996,18.937197,23.625787,29.473463,28.432143,31.89222,21.001206,22.217475,27.414908,21.340945,...,0.182433,0.180996,0.239776,0.18751,0.235114,0.198872,0.125666,0.161668,0.181775,0.188786


In [138]:
576/12

48.0

In [137]:
features.columns

Index(['patientIdx', 'full_waveform_duration_0', 'full_waveform_duration_1',
       'full_waveform_duration_10', 'full_waveform_duration_11',
       'full_waveform_duration_2', 'full_waveform_duration_3',
       'full_waveform_duration_4', 'full_waveform_duration_5',
       'full_waveform_duration_6',
       ...
       'swt_d_4_med_power_ratio_10', 'swt_d_4_med_power_ratio_11',
       'swt_d_4_med_power_ratio_2', 'swt_d_4_med_power_ratio_3',
       'swt_d_4_med_power_ratio_4', 'swt_d_4_med_power_ratio_5',
       'swt_d_4_med_power_ratio_6', 'swt_d_4_med_power_ratio_7',
       'swt_d_4_med_power_ratio_8', 'swt_d_4_med_power_ratio_9'],
      dtype='object', length=577)

In [124]:
a = pd.Series({'ix':0, 'a':1, 'b':2})
a

ix    0
a     1
b     2
dtype: int64

In [125]:
b = pd.Series({'id':0, 'c':3,'d':4})
b

id    0
c     3
d     4
dtype: int64

In [126]:
c = pd.Series({'idx':0})

In [127]:
c

idx    0
dtype: int64

In [111]:
d = pd.DataFrame()

In [128]:
c = c.append(a)
c = c.append(b)
c

idx    0
ix     0
a      1
b      2
id     0
c      3
d      4
dtype: int64

In [131]:
d.append(c, ignore_index=True)

Unnamed: 0,a,b,c,d,id,idx,ix
0,1.0,2.0,3.0,4.0,0.0,0.0,0.0
