# Import Necessary Package

In [97]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pywt

# Load Data from CSV

Database Definition

In [98]:
DS1 = ['101', '106', '108', '109', '112', '114', '115', '116', '118', '119', '122', '124', 
       '201', '203', '205', '207', '208', '209', '215', '220', '223', '230']

DS2 = ['100', '103', '105', '111', '113', '117', '121', '123', '200', '202', '210', '212', 
       '213', '214', '219', '221', '222', '228', '231', '232', '233', '234']

SUP1 = ['800', '801', '802', '803', '804', '805', '806', '807', '808', '809', '810', '811', '812', '820', '821', 
       '822', '823', '824', '825', '826', '827', '828', '829', '840', '841', '842', '843', '844', '845', '846', 
       '847', '848', '849', '850']

SUP2 = ['851','852','853','854','855','856','857','858','859','860','861','862','863','864','865','866','867',
       '868','869','870','871','872','873','874','875','876','877','878','879','880','881','882','883','884',
       '885','886','887','888','889','890','891','892','893','894']

INCART = []
for i in range(1, 76):    #for the whole INCART database, the number is ranging from 1 to 76
    if i < 10:
        INCART.append('I0' + str(i))
    else:
        INCART.append('I' + str(i))

Choose a database

In [110]:
DS = DS1  #DS1, DS2, SUP1, SUP2, INCART
DSName = 'DS1'  #DS1, DS2, SUP, INCART

In [111]:
trainingSet_leads = {}
trainingSet_anns = {}
for ds in DS:
    trainingSet_leads[ds] = pd.read_csv('~/Data/Cleaned Data/' + DSName + '/' + ds + '_lead.csv')
    trainingSet_anns[ds] = pd.read_csv('~/Data/Cleaned Data/' + DSName + '/' + ds + '_ann.csv')

# Heartbeat Annotation

In [101]:
N = ['N', 'L', 'R', 'e', 'j']
SVEB = ['A', 'a', 'J', 'S']
VEB = ['V', 'E']
F = ['F']
Q = ['l', 'f', 'Q']
Non_beat_anns = ['[', ']', '!', 'x', '(', ')', 'p', 't', 'u', '`', '~', '^', '|', '+', 's', 'T', '*', 'D', '=', '"', '@']

# Heatbeat Segmentation

For each heartbeat knowing the R location, we take samples from the interval of 250 ms before R peak and 400 ms after R peak, i.e. total 0.65 s of each heartbeat. That is, 90 samples before R peak and 144 samples after R peak.

In [102]:
Hbs = {}
for ds in DS:
    lead0 = trainingSet_leads[ds]['lead0']
    lead1 = trainingSet_leads[ds]['lead1']
    hbs0 = []
    hbs1 = []
    anns = []
    annIdxs = []
    for row in trainingSet_anns[ds].itertuples():
        if row[2] in Non_beat_anns:
            continue
        elif row[1] < 91:
            continue
        elif row[1] + 144 > len(lead0):
            continue
        else:
            anns.append(row[2])
            annIdxs.append(row[1] - 1)
            hbs0.append(lead0[row[1] - 91: row[1] + 144])
            hbs1.append(lead1[row[1] - 91: row[1] + 144])
    Hbs[ds] = pd.DataFrame({'lead0': hbs0, 'lead1': hbs1, 'ann': anns, 'annIdx': annIdxs})

DS1 recording 114 lead correction:

In [103]:
if DSName == 'DS1':
    temp = pd.Series(list(Hbs['114']['lead0']))
    Hbs['114']['lead0'] = Hbs['114']['lead1']
    Hbs['114']['lead1'] = temp

# Computing RR Intervals

In [104]:
for ds in DS:
    dsLast = len(Hbs[ds]['annIdx']) - 1
    preRR = [np.NAN]
    postRR = [Hbs[ds]['annIdx'][1] - Hbs[ds]['annIdx'][0]]
    for l in range(1, dsLast):
        preRR.append(Hbs[ds]['annIdx'][l] - Hbs[ds]['annIdx'][l-1])
        postRR.append(Hbs[ds]['annIdx'][l+1] - Hbs[ds]['annIdx'][l])
    preRR.append(Hbs[ds]['annIdx'][dsLast] - Hbs[ds]['annIdx'][dsLast-1])
    postRR.append(np.NAN)
    Hbs[ds]['preRR'] = preRR
    Hbs[ds]['postRR'] = postRR

# Skewness and Kurtosis Calculation

In [105]:
for ds in DS:
    dsLength = len(Hbs[ds]['annIdx'])
    skewness_0 = []
    kurtosis_0 = []
    skewness_1 = []
    kurtosis_1 = []
    for l in range(0, dsLength):
        skewness_0.append(Hbs[ds]['lead0'][l].skew())
        skewness_1.append(Hbs[ds]['lead1'][l].skew())
        kurtosis_0.append(Hbs[ds]['lead0'][l].kurt())
        kurtosis_1.append(Hbs[ds]['lead1'][l].kurt())
    Hbs[ds]['skewness_0'] = skewness_0
    Hbs[ds]['skewness_1'] = skewness_1
    Hbs[ds]['kurtosis_0'] = kurtosis_0
    Hbs[ds]['kurtosis_1'] = kurtosis_1

# DWT-based Features

In [106]:
wavelet = pywt.Wavelet('haar')
for ds in DS:
    dsLength = len(Hbs[ds]['annIdx'])
    cA7_lead0 = []
    cD7_lead0 = []
    cD6_lead0 = []
    cD5_lead0 = []
    cD4_lead0 = []
    cD3_lead0 = []
    cD2_lead0 = []
    cD1_lead0 = []
    
    cA7_lead1 = []
    cD7_lead1 = []
    cD6_lead1 = []
    cD5_lead1 = []
    cD4_lead1 = []
    cD3_lead1 = []
    cD2_lead1 = []
    cD1_lead1 = []
    
    for l in range(0, dsLength):
        beat = list(Hbs[ds]['lead0'][l])
        coffs = pywt.wavedec(beat, wavelet, mode='symmetric', level=7)
        cA7_lead0.append(list(coffs[0]))
        cD7_lead0.append(list(coffs[1]))
        cD6_lead0.append(list(coffs[2]))
        cD5_lead0.append(list(coffs[3]))
        cD4_lead0.append(list(coffs[4]))
        cD3_lead0.append(list(coffs[5]))
        cD2_lead0.append(list(coffs[6]))
        cD1_lead0.append(list(coffs[7]))
        
        beat = list(Hbs[ds]['lead1'][l])
        coffs = pywt.wavedec(beat, wavelet, mode='symmetric', level=7)
        cA7_lead1.append(list(coffs[0]))
        cD7_lead1.append(list(coffs[1]))
        cD6_lead1.append(list(coffs[2]))
        cD5_lead1.append(list(coffs[3]))
        cD4_lead1.append(list(coffs[4]))
        cD3_lead1.append(list(coffs[5]))
        cD2_lead1.append(list(coffs[6]))
        cD1_lead1.append(list(coffs[7]))
    
    Hbs[ds]['cA7_0'] = cA7_lead0
    Hbs[ds]['cD7_0'] = cD7_lead0
    Hbs[ds]['cD6_0'] = cD6_lead0
    Hbs[ds]['cD5_0'] = cD5_lead0
    Hbs[ds]['cD4_0'] = cD4_lead0
    Hbs[ds]['cD3_0'] = cD3_lead0
    Hbs[ds]['cD2_0'] = cD2_lead0
    Hbs[ds]['cD1_0'] = cD1_lead0
    
    Hbs[ds]['cA7_1'] = cA7_lead1
    Hbs[ds]['cD7_1'] = cD7_lead1
    Hbs[ds]['cD6_1'] = cD6_lead1
    Hbs[ds]['cD5_1'] = cD5_lead1
    Hbs[ds]['cD4_1'] = cD4_lead1
    Hbs[ds]['cD3_1'] = cD3_lead1
    Hbs[ds]['cD2_1'] = cD2_lead1
    Hbs[ds]['cD1_1'] = cD1_lead1

# 把lead0和lead1分成2个数据集

In [107]:
Hbs_lead0 = {}
Hbs_lead1 = {}

for ds in DS:
    Hbs_lead0[ds] = pd.DataFrame({
        'ann': Hbs[ds]['ann'],
        'annIdx': Hbs[ds]['annIdx'],
        'beat': Hbs[ds]['lead0'],
        'preRR': Hbs[ds]['preRR'],
        'postRR': Hbs[ds]['postRR'],
        'skewness': Hbs[ds]['skewness_0'],
        'kurtosis': Hbs[ds]['kurtosis_0'],
        'cA7': Hbs[ds]['cA7_0'],
        'cD7': Hbs[ds]['cD7_0'],
        'cD6': Hbs[ds]['cD6_0'],
        'cD5': Hbs[ds]['cD5_0'],
        'cD4': Hbs[ds]['cD4_0'],
        'cD3': Hbs[ds]['cD3_0'],
        'cD2': Hbs[ds]['cD2_0'],
        'cD1': Hbs[ds]['cD1_0'],
    })
    
    Hbs_lead1[ds] = pd.DataFrame({
        'ann': Hbs[ds]['ann'],
        'annIdx': Hbs[ds]['annIdx'],
        'beat': Hbs[ds]['lead1'],
        'preRR': Hbs[ds]['preRR'],
        'postRR': Hbs[ds]['postRR'],
        'skewness': Hbs[ds]['skewness_1'],
        'kurtosis': Hbs[ds]['kurtosis_1'],
        'cA7': Hbs[ds]['cA7_1'],
        'cD7': Hbs[ds]['cD7_1'],
        'cD6': Hbs[ds]['cD6_1'],
        'cD5': Hbs[ds]['cD5_1'],
        'cD4': Hbs[ds]['cD4_1'],
        'cD3': Hbs[ds]['cD3_1'],
        'cD2': Hbs[ds]['cD2_1'],
        'cD1': Hbs[ds]['cD1_1'],
    })


# Save Extracted Feature to CSV

This is to solve the problem that the series type can not be correctly stored in the CSV

In [108]:
for ds in DS:
    beatValues = []
    beatIndexs = []
    for i in range(len(Hbs_lead0[ds]['beat'])):
        beatValues.append(list(Hbs_lead0[ds]['beat'][i].values))
        beatIndexs.append(list(Hbs_lead0[ds]['beat'][i].index))
    Hbs_lead0[ds]['beatValues'] = beatValues
    Hbs_lead0[ds]['beatIndex'] = beatIndexs
    
    beatValues = []
    beatIndexs = []
    for i in range(len(Hbs_lead1[ds]['beat'])):
        beatValues.append(list(Hbs_lead1[ds]['beat'][i].values))
        beatIndexs.append(list(Hbs_lead1[ds]['beat'][i].index))
    Hbs_lead1[ds]['beatValues'] = beatValues
    Hbs_lead1[ds]['beatIndex'] = beatIndexs

In [109]:
for ds in DS:
    Hbs_lead0[ds].to_csv('~/Data/Extracted Features/PLOS_ONE/' + DSName + '/' + ds + '_lead0.csv', index=False)
    Hbs_lead1[ds].to_csv('~/Data/Extracted Features/PLOS_ONE/' + DSName + '/' + ds + '_lead1.csv', index=False)