# Physionet 2017 | ECG Rhythm Classification
## 3. Create Training Dataset
### Sebastian D. Goodfellow, Ph.D.

<br>
# Setup Noteboook

In [18]:
# Import 3rd party libraries
import os
import sys

# Import local Libraries
sys.path.insert(0, r'C:\Users\sebig\Documents\code\deep_ecg')
from utils.data.ecg_tools.waveform_db import WaveformDB
from utils.data.ecg_tools.training_dataset import TrainingDataset
from utils.plotting.waveforms import plot_waveforms_interact

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# 1. Import Waveform Database

In [19]:
# Set path
path = os.path.join(os.path.dirname(os.getcwd()), 'data')

# Sample frequency
fs = 300

In [20]:
# Initialize
waveform_db = WaveformDB(
    path_waveforms=os.path.join(path, 'waveforms'),
    path_labels=os.path.join(path, 'labels'),
    fs=fs
)

# Build waveform database
waveform_db.load_database()

# 2. Create Training Dataset

In [21]:
# Get dataset
dataset = TrainingDataset(waveforms=waveform_db.waveforms, duration=60, path=path, 
                          fs=fs, file_name='training_60s.pickle', classes=['N', 'A', 'O'])

In [22]:
# Plot dataset
dataset.plot_interact()

interactive(children=(IntSlider(value=4124, description='index', max=8248), Output()), _dom_classes=('widget-i…

# 3. Test training and validation dimensions

In [23]:
# Dataset
dataset.data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17990,17991,17992,17993,17994,17995,17996,17997,17998,17999
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.006038,-0.029938,-0.050968,-0.066733,-0.075896,-0.078686,-0.07684,-0.072681,-0.067662,-0.06131,...,0.166619,0.176484,0.183984,0.186159,0.179995,0.163308,0.135429,0.097412,0.051803,0.002156354
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.00929,-0.051783,-0.090654,-0.122534,-0.144774,-0.156099,-0.157107,-0.150224,-0.138938,-0.126603,...,-0.415277,-0.381404,-0.340459,-0.291859,-0.238339,-0.18427,-0.133117,-0.086029,-0.042192,-1.526284e-17


In [24]:
# Labels
dataset.labels.head()

Unnamed: 0,file_name,label_str,label_int
0,A00001,N,0
1,A00002,N,0
2,A00003,N,0
3,A00004,A,1
4,A00005,A,1
