In [67]:
import pandas as pd
import wfdb
import os
import glob
import numpy as np
import matplotlib.pyplot as plt

### Importing LTAFDB

In [68]:
records_long = []
properties_long = []
annot_long = []
AnnSymb_long = []
AnnSamp_long = []
AnnRhythm_long = []
Rpeak_Samp_long = []
Rpeak_Symb_long = []


for f in glob.glob('/data/ltafdb/*.dat'): ##### change the path to own directory.       
                                          ##### The rest should remin the same
                                          
    sig, fields = wfdb.rdsamp(f[:-4], channels=[1])  #### In this function, pass 
                                                     #### "channels=[0]" or "channels=[1]" to select channel 1 or 2.
    ann = wfdb.rdann(f[:-4], 'atr')
    QRS = wfdb.rdann(f[:-4], 'qrs')
    Symb = pd.Series(ann.symbol)
    Samp = pd.Series(ann.sample)
    QRS_Symb = pd.Series(QRS.symbol)
    QRS_Samp = pd.Series(QRS.sample)
    Rhythm = pd.Series(ann.aux_note)
    records_long.append(sig)
    properties_long.append(fields)
    annot_long.append(ann)
    AnnSymb_long.append(Symb)
    AnnSamp_long.append(Samp)
    Rpeak_Symb_long.append(QRS_Symb)
    Rpeak_Samp_long.append(QRS_Samp)
    AnnRhythm_long.append(Rhythm)
    
AnnSymb_long = pd.Series(AnnSymb_long).values
AnnSamp_long = pd.Series(AnnSamp_long).values

In [70]:
appended_AnnSamp = [item for i in range(len(AnnSamp_long)) for item in AnnSamp_long[i]]
appended_AnnSymb = [item for i in range(len(AnnRhythm_long)) for item in AnnRhythm_long[i]]

In [2]:
df = pd.DataFrame(
    {'Rpeaks': appended_AnnSamp,
     'Label': appended_AnnSymb
    })

### Rescaling frequency sampling of 128 Hz for LTAFDB into 250 Hz (AFDB) and preprocess the dataset. 

In [1]:
df['Rpeaks'] = df['Rpeaks']*2
df['RRI'] = abs(df['Rpeaks'] - df['Rpeaks'].shift(-1))
df = df.replace(r'^\s*$', np.nan, regex=True) ### Replace empty places with NaN
df = df.fillna(method='ffill') ### Perform "forward fill" - filling instances (NaN) in front row with preceeeding value

arrhythmia_list = ['(N', '(AFIB']
final_df = df[df['Label'].isin(arrhythmia_list)]
final_df = final_df.drop(final_df[final_df.RRI > 1000].index)

final_df['Label'] = final_df['Label'].map( 
                   {'(N':False , '(AFIB':True}) 
final_df['Label'] = final_df['Label'].astype(int)

### ECG records segmentation

In [82]:
def segmenting_record_long(seg_value_long):
    rri2_long = final_df['RRI'] ### Storing the intervals between rpeaks
    rri2_long = np.array(rri2_long, dtype=np.float64) ### Ensuring no overflow issues happens, when calculating in for loop later
    
    amount_long = -(len(rri2_long) % seg_value_long) # amount of data points to remove, for equal length segments with no residue points
    print(f"Amount to remove {amount_long}")
    
    rec_longmount_long = rri2_long[:amount_long]
    seg_shape_long = len(rec_longmount_long) // seg_value_long # amount of total segments (given the specified segment length) 
    print(f"Shape 0: {seg_shape_long}")
    
    segmented_rec_long = rec_longmount_long.reshape(seg_shape_long,seg_value_long)
    return segmented_rec_long, amount_long, seg_shape_long

### Extracting and segmenting the input features

In [18]:
### Loads in functions for Shannon Entropy, Mean absolute deviation calculations
%run "features_utils.ipynb"
from scipy.stats import median_abs_deviation

In [19]:
seg_len = 20 # Specify the segment length (0, 20, or 60 hearbeats)
segmented_record_long, amount_long, seg_shape_long = segmenting_record_long(seg_len) 

all_features_long = []

### Calculating features for every single segmented "block" inside the segmented_record_long variable
for x in range(len(segmented_record_long)):
    
    #MEAN
    ff1 = np.nanmean(segmented_record_long[x]) ### nanmean, nanstd computes values while ignoring nan-values
    #STD
    ff2 = np.nanstd(segmented_record_long[x])
    #RMSSD
    sum_ = 0
    for y in range(len(segmented_record_long[x]) - 1): ### loops 19 times
        sum_ += (segmented_record_long[x][y] - segmented_record_long[x][y+1])**2
    sum_multiplied = 1/(len(segmented_record_long) - 1) * sum_
    ff3 = np.sqrt(sum_multiplied)
    #NORMALIZED RMSSD
    ff4 = (ff3 / ff1)
    #SHANNON ENTROPY
    ff5 = entropy(segmented_record_long[x])
    #MEAN ABSOLUTE DEVIATION
    ff6 = mean_abs_deviation(segmented_record_long[x])
    #MEDIAN ABSOLUTE DEVIATION
    ff7 = median_abs_deviation(segmented_record_long[x])

    my_features = pd.Series([np.around(ff1, 3), np.around(ff2, 3), np.around(ff3, 3), np.around(ff4, 3), np.around(ff5, 3), 
                             np.around(ff6, 3), np.around(ff7, 3)],
                            index=['Mean','STD','RMSSD','Normalized RMSSD','Shannon Entropy',
                                   'Mean absolute deviation','Median absolute deiviation'])
    all_features_long.append(my_features)

### Preprocess the rhythms' labels/targets

In [20]:
Label_long = final_df["Label"]
Label_long = np.array(Label_long, dtype=np.float64)

y_amount_long = Label_long[:amount_long-1] 
y_shape_long = y_amount_long.reshape(seg_shape_long, seg_len) 
y_list_long = []

### Calc if every 20 segment block is Normal synus rythm or AFIB
for g in range(len(y_shape_long)):
    #y_segment = y_shape
    sum_segment_long = np.sum(y_shape_long[g])
    if sum_segment_long >= int(seg_len/2): 
        sum_segment_long = 1
    else: sum_segment_long = 0
    y_list_long.append(sum_segment_long)

y_list_long = np.array(y_list_long, dtype=np.float64)
print(y_list_long.dtype)
print(f"Length of y_list: {len(y_list_long)}")