In [13]:
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
# local imports
from utilities import utilities

## Alguns parametros especificos para este dataset

In [12]:
path = 'physiobank_tool/mitdb'
number_of_segments = 120
low_cut = 0.1

## Lendo os arquivos que contém o sinal

In [14]:
files = []
with open(path+'/header.txt') as f:
    reader = csv.reader(f)
    for row in reader:
        #print(row)
        files.append(row[0])
print("Read data for the following drivers:\n", files)

Read data for the following drivers:
 ['100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '111', '112', '113', '114', '115', '116', '117', '118', '119', '121', '122', '123', '124', '200', '201', '202', '203', '205', '207', '208', '209', '210', '212', '213', '214', '215', '217', '219', '220', '221', '222', '223', '228', '230', '231', '232', '233', '234']


## Importando os arquivos para um DataFrame

In [15]:
def read_file(file_name):
    ''' docstring here later '''
    cols_of_interest = [0,1,2]
    ecg_data = pd.read_csv(f'{path}/{file_name}.csv', usecols=cols_of_interest)
    # drop useless header
    ecg_data = ecg_data.drop(ecg_data.index[0])
    # name columns
    ecg_data.columns = ['time', 'ECG', 'ECGF']
    # cast some columns to float
    ecg_data['time'] = ecg_data['time'].astype(float)
    ecg_data['ECG'] = ecg_data['ECG'].astype(float)
    
    return ecg_data

In [16]:
def run(ecg_data, file_name, number_of_segments, low_cut):
    """adiciona a docstring aqui depois"""
    sample_rate = utilities.detect_sample_rate(ecg_data)
    print(sample_rate)
    high_cut = sample_rate/3.0
    
    data_preparation_pipeline = Pipeline([
        ('filtering', utilities.Filter(sample_rate, low_cut, high_cut)),
        ('feature_detection', utilities.FeatureExtractor(number_of_segments, sample_rate)),
        ])

    extracted_features_df = data_preparation_pipeline.fit_transform(ecg_data['ECG'])
    #print(extracted_features_df)
    #extracted_features_df.to_csv(f"{path}_output/{file_name}.csv")
    extracted_features_df.reset_index(drop=True, inplace=True)
    return extracted_features_df

In [None]:
features_df = pd.DataFrame()
for file in files:
    ecg_data = read_file(file)
    plt.plot(ecg_data['ECG'][:10000])
    current_df = run(ecg_data, file, number_of_segments, low_cut)
    current_df['person'] = file
    features_df = pd.concat([current_df, features_df], ignore_index=True)

features_df.to_csv("_mitdb.csv")

360
360
360
360
360
360
360
360
360
360
360
360
360
360


## dando uma olhada no signal e a filtragem obtida:

In [8]:
primeiro_cara

Unnamed: 0,mean_q,mean_r,mean_s,stdev_q,stdev_r,stdev_s,mean_qrs_interval,mean_rr_interval,mean_rq_amplitude
0,-0.076557,0.726937,-0.089368,0.012151,0.028672,0.016181,799.395161,0.803493,61.827957
1,0.040557,0.751134,-0.027797,0.022051,0.064234,0.041294,685.483871,0.710577,60.483871
2,-0.08796,0.573185,-0.157539,0.010951,0.0527,0.039502,733.870968,0.661145,57.795699
3,-0.091657,0.597291,-0.13432,0.028864,0.022125,0.016704,608.870968,0.688948,60.483871
4,-0.050131,0.704164,-0.086183,0.025439,0.023942,0.027725,897.177419,0.754295,57.459677
5,-0.094434,0.607933,-0.113431,0.012614,0.018382,0.014636,753.024194,0.702367,62.5
6,-0.067672,0.675807,-0.091626,0.015256,0.019826,0.018262,947.580645,0.743479,59.475806
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,-0.075189,0.590012,-0.114151,0.001905,0.01159,0.003306,711.693548,0.665202,62.5
9,-0.121485,0.556244,-0.173253,0.037438,0.03182,0.035558,794.354839,0.677729,61.155914


In [9]:
filter = utilities.Filter(sample_rate, low_cut, high_cut)
filtered_two_seconds = filter.fit_transform(two_seconds)

fig=plt.figure(figsize=(20, 7), dpi= 80, facecolor='w', edgecolor='k')

plt.plot(filtered_two_seconds)
two_seconds.plot()

NameError: name 'sample_rate' is not defined

## Fazendo pre-processamento dos dados

In [None]:
df = read_file(files[0])
df.sample(n=30)