In [24]:
import matplotlib 
import matplotlib.pyplot as plt

import wfdb 
import os
import numpy as np
import math
import sys
import scipy.stats as st
import glob, os
from os.path import basename
import csv

In [26]:
class Processor(object):
    def __init__(self, records, amount = 5*512, step = 512, url = '', save_to_file = True):
        self.records = records
        self.amount_of_points = amount
        self.step = step
        self.url_to_data = url
        self.save_to_file = save_to_file
        
    def get_indexes(self, annotation):
        n_index = [i for i, a in zip(annotation.sample, annotation.symbol) if a == 'N' ]
        p1_index = [i for i, a in zip(annotation.sample, annotation.symbol) if a == '(']
        p2_index = [i for i, a in zip(annotation.sample, annotation.symbol) if a == ')']
        t_index = [i for i, a in zip(annotation.sample, annotation.symbol) if a == 't']
        p_index = [i for i, a in zip(annotation.sample, annotation.symbol) if a == 'p']

        return n_index, p1_index, p2_index, t_index, p_index 
    
    def clear_records(self, n_index, p1_index, p2_index, t_index, p_index, minx, maxx):
        buf = 0
        indexes = []
        for i, val in enumerate(np.arange(minx, maxx)):
            if (val not in n_index) and (val not in p1_index) and (val not in p2_index)\
                and (val not in t_index) and (val not in p_index):
                buf += 1
            else:
                buf = 0
            if buf >= 50:
                indexes.append(i)

        return indexes

    def get_train_data(self, X,Y, n_index, p1_index, p2_index, t_index, p_index):
        result = []
        for i in X:
            if i in p1_index:
                result.append(0)
            elif i in p_index:
                result.append(1)
            elif i in p2_index:
                result.append(2)
            elif i in n_index:
                result.append(3)
            elif i in t_index:
                result.append(4)
            else:
                result.append(-1)

        return result

    def chunks(data, n):
        for i in range(0, len(data), n):
            yield data[i:i+n]

    def show_figure(X,Y, n_index, p1_index, p2_index, t_index, p_index):
        plt.figure(figsize=(15,5))
        plt.plot(X, Y)
        for n in p1_index:
            plt.axvline(n, color = 'red')
        for n in p2_index:
            plt.axvline(n, color = 'green')
        for n in n_index:
            plt.axvline(n, color = 'black')
        for n in t_index:
            plt.axvline(n, color = 'yellow')
        for n in p_index:
            plt.axvline(n, color = 'pink')
        plt.show()

    def record_proc(self):
        data = self.process_to_array()
        #self.print_data(data)
        if(self.save_to_file):
            self.process_to_file(data)
        return data
        
    def process_to_array(self):
        recordsList = []
        for record in self.records:
            recordname = '.' + self.url_to_data +'/qrs/' + record
            annotation = wfdb.rdann(recordname, 'q1c')
            minx, maxx = np.min(annotation.sample),np.max(annotation.sample)
            record = wfdb.rdsamp(recordname, sampfrom=minx, sampto = maxx)[0]
            annotation = wfdb.rdann(recordname, 'q1c', sampfrom=minx, sampto = maxx)

            n_index, p1_index, p2_index, t_index, p_index = self.get_indexes(annotation) 
            indexes = self.clear_records(n_index, p1_index, p2_index, t_index, p_index, minx, maxx)

            X = np.delete(np.arange(minx, maxx), indexes)
            Y = np.delete(record[:, 0], indexes)
            result = self.get_train_data(X,Y, n_index, p1_index, p2_index, t_index, p_index)
            for i,k in enumerate(X):
                X[i] = i
            
            result = list(zip(X,Y,result))
            for i in self.window(result,self.amount_of_points,self.step):
                recordsList.extend(i)
            return recordsList
            
    def process_to_file(self, data):
        for i, array in enumerate(data):
            with open('.'+self.url_to_data + '/clean_train_data' + str(i) + '.csv', "w") as the_file:
                writer = csv.writer(the_file, quoting=csv.QUOTE_NONE)
                writer.writerow(array)
        print('Created: ', len(data), ' files')
        print("Important: float values were saved with three decimal digits")
        
    def print_data(self, data):
        print('amount of records/rows: ', len(data))
        for row in data:
            print(row)
                
    def window(self, sequence, winSize, step):
        numOfChunks = ((len(sequence)-winSize)//step)+1
        for i in range(0,numOfChunks*step,step):
            yield sequence[i:i+winSize]

In [27]:
records = ['sel100','sel114','sel16265','sel16272','sel16273','sel16420','sel16483','sel213','sel233','sel302','sel306',
           'sel308','sel803','sel811','sel820','sel847','sel853','sel871','sel872','sel873','sel883','sel891',
           'sele0110','sele0121','sele0129','sele0133','sele0170','sele0203','sele0405','sele0411',
           'sele0509','sele0606']
short_records = ['sel16265']
width = 512*5
step = 512
URL_TO_DATA = '/Downloads/ecg'
SAVE_TO_FILE = False

In [29]:
processor = Processor(records, width, step, url = URL_TO_DATA)
result = processor.record_proc()
print('Amount of pieces: ', len(result))

Created:  15360  files
Important: float values were saved with three decimal digits
Amount of pieces:  15360
