# ECG Feature Extractor
## 2017 Physionet Challenge
### Sebastian D. Goodfellow, Ph.D.

# Setup Notebook

In [9]:
# Import 3rd party libraries
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

# Import local Libraries
sys.path.insert(0, os.path.dirname(os.getcwd()))
from features.feature_extractor import Features
from utils.plotting.waveforms import plot_waveforms

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


<br>
# Set Constants

In [10]:
# Sampling frequency (Hz)
fs = 300  

# Data paths
label_path = os.path.join(os.path.dirname(os.getcwd()), 'data', 'labels')
waveform_path = os.path.join(os.path.dirname(os.getcwd()), 'data', 'waveforms')
feature_path = os.path.join(os.path.dirname(os.getcwd()), 'data', 'features')

# Import ECG Labels

In [11]:
# Read labels CSV
labels = pd.read_csv(os.path.join(label_path, 'labels.csv'), names=['file_name', 'label'])

# View DataFrame
labels.head(10)

Unnamed: 0,file_name,label
0,A00021,N
1,A00022,~
2,A00023,O
3,A00024,O
4,A00025,N
5,A00026,N
6,A00027,A
7,A00028,N
8,A00029,O
9,A00030,O


# Plot ECG Waveforms

In [12]:
# Launch interactive plotting widget
plot_waveforms(labels=labels, waveform_path=waveform_path, fs=fs)

interactive(children=(IntSlider(value=4, description='index', max=9), Output()), _dom_classes=('widget-interac…

# Extract Features

In [13]:
# Instantiate
ecg_features = Features(file_path=waveform_path, fs=fs, feature_groups=['rri_features'])

# Calculate ECG features
ecg_features.extract_features(
    filter_bandwidth=[3, 45], n_signals=None, show=True, 
    labels=labels, normalize=True, polarity_check=True,
    template_before=0.25, template_after=0.4
)

Finished extracting features from A00025.mat | Extraction time: 0.003 minutes
Finished extracting features from A00022.mat | Extraction time: 0.003 minutes
Finished extracting features from A00026.mat | Extraction time: 0.003 minutes
Finished extracting features from A00029.mat | Extraction time: 0.003 minutes
Finished extracting features from A00030.mat | Extraction time: 0.003 minutes
Finished extracting features from A00024.mat | Extraction time: 0.004 minutes
Finished extracting features from A00028.mat | Extraction time: 0.004 minutes
Finished extracting features from A00023.mat | Extraction time: 0.003 minutes
Finished extracting features from A00027.mat | Extraction time: 0.003 minutes
Finished extracting features from A00021.mat | Extraction time: 0.003 minutes


In [14]:
# Get features DataFrame
features = ecg_features.get_features()

# Display all available features
# print(features.columns)

# Display only  the rpeaks_enzo column
print(features['rpeaks_x_enzo'][0])

# Display only the rri_mean column
print(features['rri_mean'][0])

# Display only the diff2_rri_mean column
print(features['diff2_rri_mean'][0])


[ 151  427  706  993 1282 1562 1849 2138 2419 2695 2981 3258 3528 3812
 4088 4363 4651 4940 5215 5493 5779 6065 6341 6624 6911 7186 7473 7764
 8040 8329 8615]
0.9415476190476191
-0.000641025641025643


# Extraction des coordonnées x des points r_peaks

In [15]:
# extract rpeaks_enzo column into a numpy array
coord_x_rpeaks = np.array(features['rpeaks_x_enzo'][0])
np.save('../extracted_data/coord_x_r_peaks_A00021.npy',coord_x_rpeaks)

### A update dans le rapport
Ce n'est pas vrm une bonne idée de regarder la distance entre chaque pique. Il faudrait plutôt regarder les bpm et voir si ils sont dans la norme. (si < que 60 alors bradycardie, si > que 100 alors tachycardie)

Egalement préciser pourquoi 3% ? parce que c'est un nombre qui a été trouvé par essai erreur et qui donne de bons résultats.

# Save Features

In [16]:
# Save features DataFrame to CSV
features.to_csv(os.path.join(feature_path, 'features.csv'), index=False)