In [55]:
import wfdb
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Importing MITDB

In [56]:
records_A = []
properties_A = []
annot_A = []
AnnSymb_A = []
AnnSamp_A = []
AnnRhythm_A = []
Rpeak_Samp_A = []
Rpeak_Symb_A = []


for f in glob.glob('/data/mitdb/*.dat'):             #### change the path to own directory.       
    sig, fields = wfdb.rdsamp(f[:-4], channels=[1])  #### In this function, pass "channels=[0]" or "channels=[1]" to select channel 1 or 2.
    
    ann = wfdb.rdann(f[:-4], 'atr')
    QRS = wfdb.rdann(f[:-4], 'xws')
    Symb = pd.Series(ann.symbol)
    Samp = pd.Series(ann.sample)
    
    QRS_Symb = pd.Series(QRS.symbol)
    QRS_Samp = pd.Series(QRS.sample)
    Rhythm = pd.Series(ann.aux_note)
    records_A.append(sig)
    properties_A.append(fields)
    annot_A.append(ann)
    AnnSymb_A.append(Symb)
    AnnSamp_A.append(Samp)
    Rpeak_Symb_A.append(QRS_Symb)
    Rpeak_Samp_A.append(QRS_Samp)
    AnnRhythm_A.append(Rhythm)
    
AnnSymb_A = pd.Series(AnnSymb_A).values
AnnSamp_A = pd.Series(AnnSamp_A).values

In [68]:
appended_AnnSamp = [item for i in range(len(AnnSamp_A)) for item in AnnSamp_A[i]]
appended_AnnSymb = [item for i in range(len(AnnRhythm_A)) for item in AnnRhythm_A[i]]

In [1]:
df = pd.DataFrame(
    {'Rpeaks': appended_AnnSamp,
     'Label': appended_AnnSymb
    })

### Rescaling frequency sampling of 360 Hz for MITDB into 250 Hz (AFDB) and preprocess the dataset.

In [70]:
df['Rpeaks'] = round(0.694 * df['Rpeaks'])
df['RRI'] = abs(df['Rpeaks'] - df['Rpeaks'].shift(-1))
df = df.replace(r'^\s*$', np.nan, regex=True) ### Replace empty places with NaN
df = df.fillna(method='ffill') ### Perform "forward fill" - filling instances (NaN) in front row with preceeeding value

arrhythmia_list = ['(N', '(N\x00', '(AFIB', '(AFIB\x00']
final_df = df[df['Label'].isin(arrhythmia_list)]

final_df.loc[df['Label'].astype(str) == '(N\x00', 'Label'] = '(N'
final_df.loc[df['Label'].astype(str) == '(AFIB\x00', 'Label'] = '(AFIB'
final_df = final_df.drop(final_df[final_df.RRI > 1000].index)

final_df['Label'] = final_df['Label'].map( 
                   {'(N':False , '(AFIB':True}) 
final_df['Label'] = final_df['Label'].astype(int)

### ECG records segmentation

In [85]:
def segmenting_record_A(seg_value_A):
    rri2_A = final_df['RRI'] ### Storing the intervals between rpeaks
    rri2_A = np.array(rri2_A, dtype=np.float64) ### Ensuring no overflow issues happens, when calculating in for loop later
    
    amount_A = -(len(rri2_A) % seg_value_A) # amount of data points to remove, for equal length segments with no residue points
    print(f"Amount to remove {amount_A}")
    
    rec_amount_A = rri2_A[:amount_A]
    seg_shape_A = len(rec_amount_A) // seg_value_A # amount of total segments (given the specified segment length) 
    print(f"Shape 0: {seg_shape_A}")
    
    segmented_rec_A = rec_amount_A.reshape(seg_shape_A,seg_value_A)
    return segmented_rec_A, amount_A, seg_shape_A

### Extracting and segmenting the input features

In [88]:
### Loads in functions for Shannon Entropy, Mean absolute deviation calculations
%run "features_utils.ipynb"
from scipy.stats import median_abs_deviation

In [35]:
seg_len = 20 # Specify the segment length (0, 20, or 60 hearbeats)
segmented_record_A, amount_A, seg_shape_A = segmenting_record_A(seg_len) # Specify the segment length

all_features_A = []

### Calculating features for every single segmented "block" inside the segmented_record_A variable
for x in range(len(segmented_record_A)):
    
    #MEAN
    ff1 = np.nanmean(segmented_record_A[x]) ### nanmean, nanstd computes values while ignoring nan-values
    #STD
    ff2 = np.nanstd(segmented_record_A[x])
    #RMSSD
    sum_ = 0
    for y in range(len(segmented_record_A[x]) - 1): ### loops 19 times
        sum_ += (segmented_record_A[x][y] - segmented_record_A[x][y+1])**2
    sum_multiplied = 1/(len(segmented_record_A) - 1) * sum_
    ff3 = np.sqrt(sum_multiplied)
    #NORMALIZED RMSSD
    ff4 = (ff3 / ff1)
    #SHANNON ENTROPY
    ff5 = entropy(segmented_record_A[x])
    #MEAN ABSOLUTE DEVIATION
    ff6 = mean_abs_deviation(segmented_record_A[x])
    #MEDIAN ABSOLUTE DEVIATION
    ff7 = median_abs_deviation(segmented_record_A[x])

    my_features = pd.Series([np.around(ff1, 3), np.around(ff2 ,3), np.around(ff3, 3), np.around(ff4, 3), np.around(ff5, 3), 
                             np.around(ff6, 3), np.around(ff7, 3)],
                            index=['Mean','STD','RMSSD','Normalized RMSSD','Shannon Entropy',
                                   'Mean absolute deviation','Median absolute deiviation'])
    all_features_A.append(my_features)

### Preprocess the rhythms' labels/targets

In [37]:
Label_A = final_df["Label"]
Label_A = np.array(Label_A, dtype=np.float64)

y_amount_A = Label_A[:amount_A-1] # 
y_shape_A = y_amount_A.reshape(seg_shape_A, seg_len) # 
y_list_A = []

### Calc if every 20 segment block is Normal synus rythm or AFIB
for g in range(len(y_shape_A)):
    #y_segment = y_shape
    sum_segment_A = np.sum(y_shape_A[g])
    if sum_segment_A >= int(seg_len/2): # 
        sum_segment_A = 1
    else: sum_segment_A = 0
    y_list_A.append(sum_segment_A)

y_list_A = np.array(y_list_A, dtype=np.float64)
print(y_list_A.dtype)
print(f"Length of y_list: {len(y_list_A)}")

float64
Length of y_list: 9104
