In [1]:
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from datetime import datetime
# local imports
from ..utilities import utilities

ValueError: attempted relative import beyond top-level package

In [None]:
# configure tqdm printing
#%%capture
from tqdm import tqdm_notebook as tqdm
tqdm().pandas()

In [None]:
path = '../physiobank_tool/challenge'
number_of_segments = 120
low_cut = 0.1 # specific for this dataset

In [None]:
files = []
with open(path+'/header.txt') as f:
    reader = csv.reader(f)
    for row in reader:
        #print(row)
        files.append(row[0])
print("Read data for the following drivers:\n", files[:10])

In [None]:
def read_file(file_name):
    ''' Read ecg information from a file and saves it to a dataframe  '''
    cols_of_interest = [0,1]
    ecg_data = pd.read_csv(f'{path}/{file_name}.csv', usecols=cols_of_interest)
    # drop useless header
    ecg_data = ecg_data.drop(ecg_data.index[0])
    # name columns
    ecg_data.columns = ['time', 'ECG']
    # cast some columns to float
    ecg_data['time'] = ecg_data['time'].astype(float)
    ecg_data['ECG'] = ecg_data['ECG'].astype(float)
    
    return ecg_data

In [None]:
def run(ecg_data, file_name, number_of_segments, low_cut):
    """Execute the pipeline that segmentate and extract the features of a ecg signal 
    inputted as a dataframe"""
    sample_rate = utilities.detect_sample_rate(ecg_data)
    high_cut = sample_rate/3.0
    
    data_preparation_pipeline = Pipeline([
        ('filtering', utilities.Filter(sample_rate, low_cut, high_cut)),
        ('feature_detection', utilities.FeatureExtractor(number_of_segments, sample_rate)),
        ])

    extracted_features_df = data_preparation_pipeline.fit_transform(ecg_data['ECG'])
    extracted_features_df.reset_index(drop=True, inplace=True)
    return extracted_features_df

In [None]:
# A final dataframe to store all subjects (people) ecg information 
features_df = pd.DataFrame()
# read every file and return the dataframe of extracted features for ech file
for file in tqdm(files):
    ecg_data = read_file(file)
    current_df = run(ecg_data, file, number_of_segments, low_cut)
    current_df['person'] = file
    features_df = pd.concat([current_df, features_df], ignore_index=True)

    

In [None]:
# Check an example
features_df.head(5)

In [None]:
# check if we have equivalent number of examples per class   
#features_df['person'].value_counts()

In [None]:
# Save the final extracted features
now = str(datetime.now())
features_df.to_csv(f"../extracted_features_files/challange_{now}.csv", index=False)

In [None]:
## Examples below

In [None]:
ecg_data = read_file(files[1])
sample_rate = utilities.detect_sample_rate(ecg_data)
two_seconds = ecg_data['ECG'][0:sample_rate*2]
plt.figure(figsize=(20, 7), dpi= 80, facecolor='w', edgecolor='k')

list_of_filters = [utilities.Filter(sample_rate, x, sample_rate/3.0) for x in np.linspace(0.1, 1, 10)]
list_of_filtered_signals = [x.fit_transform(two_seconds) + index for index, x in enumerate(list_of_filters)]

for index, signal in enumerate(list_of_filtered_signals):
    plt.plot(signal, label=f'filtered_{index}')
plt.plot(two_seconds+1.0, label='original')
plt.legend()