In [22]:
import wfdb
from wfdb import processing
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal

### Importing NSRDB

In [23]:
records_nsr = []
properties_nsr = []
annot_nsr = []
AnnSymb_nsr = []
AnnSamp_nsr = []
AnnRhythm_nsr = []
new_records_nsr = []
AnnSamp_250_nsr = []

for f in glob.glob('/data/nsrdb/*.dat'):             #### change the path to your own directory.
    sig, fields = wfdb.rdsamp(f[:-4], channels=[1])  #### In this function, you have the opportunity to pass "channels=[0]" or "channels=[1]" to select channel 1 or 2.
    
    ann = wfdb.rdann(f[:-4], 'atr')
    Symb = pd.Series(ann.symbol)
    Samp = pd.Series(ann.sample)

    Rhythm = pd.Series(ann.aux_note)
    records_nsr.append(sig)
    properties_nsr.append(fields)
    annot_nsr.append(ann)
    AnnSymb_nsr.append(Symb)
    AnnSamp_nsr.append(Samp)

    AnnRhythm_nsr.append(Rhythm)
    
AnnSymb_nsr = pd.Series(AnnSymb_nsr).values
AnnSamp_nsr = pd.Series(AnnSamp_nsr).values

In [None]:
labeled_Rpeaks_nsr.rename(columns={0: 'Rpeaks'},
          inplace=True, errors='raise')
labeled_Rpeaks_nsr['Label'] = 0

Rpeaks_nsr = labeled_Rpeaks_nsr["Rpeaks"]
Label_nsr = labeled_Rpeaks_nsr["Label"]
Label_nsr = np.array(Label_nsr, dtype=np.float64)

### Rescaling frequency sampling of 128 Hz for LTAFDB into 250 Hz (AFDB) and preprocess the dataset. 

In [24]:
labeled_Rpeaks_nsr = []
appended_data_nsr = []
for i in range(18): ## 18 records
    df = pd.DataFrame(AnnSamp_nsr[i]*2) ### Roughly resampling from 128 to 250 Hz
    appended_data_nsr.append(df)

labeled_Rpeaks_nsr = pd.concat(appended_data_nsr)

### ECG records segmentation

In [27]:
def segmenting_record_nsr(seg_value_nsr):
    rri2_nsr = np.diff(Rpeaks_nsr) ### Storing the intervals between rpeaks
    rri2_nsr = np.array(rri2_nsr, dtype=np.float64) ### Ensuring no overflow issues happens, when calculating in for loop later
    
    amount_nsr = -(len(rri2_nsr) % seg_value_nsr) # amount of data points to remove, for equal length segments with no residue points
    print(f"Amount to remove {amount_nsr}")
    
    rec_amount_nsr = rri2_nsr[:amount_nsr]
    seg_shape_nsr = len(rec_amount_nsr) // seg_value_nsr # amount of total segments (given the specified segment length) 
    print(f"Shape 0: {seg_shape_nsr}")
    
    segmented_rec_nsr = rec_amount_nsr.reshape(seg_shape_nsr,seg_value_nsr)
    return segmented_rec_nsr

### Extracting and segmenting the input features

In [10]:
### Loads in functions for Shannon Entropy, Mean absolute deviation calculations
%run "features_utils.ipynb"
from scipy.stats import median_abs_deviation

In [11]:
seg_len = 20
segmented_record_nsr, amount_nsr, seg_shape_nsr = segmenting_record_nsr(seg_len) # Specify the segment length

all_features_nsr = []

### Calculating features for every single segmented "block" inside the segmented_record_nsr variable
for x in range(len(segmented_record_nsr)):
    
    #MEAN
    ff1 = np.nanmean(segmented_record_nsr[x]) ### nanmean, nanstd computes values while ignoring nan-values
    #STD
    ff2 = np.nanstd(segmented_record_nsr[x])
    #RMSSD
    sum_ = 0
    for y in range(len(segmented_record_nsr[x]) - 1): ### loops 19 times
        sum_ += (segmented_record_nsr[x][y] - segmented_record_nsr[x][y+1])**2
    sum_multiplied = 1/(len(segmented_record_nsr) - 1) * sum_
    ff3 = np.sqrt(sum_multiplied)
    #NORMALIZED RMSSD
    ff4 = (ff3 / ff1)
    #SHANNON ENTROPY
    ff5 = entropy(segmented_record_nsr[x])
    #MEAN ABSOLUTE DEVIATION
    ff6 = mean_abs_deviation(segmented_record_nsr[x])
    #MEDIAN ABSOLUTE DEVIATION
    ff7 = median_abs_deviation(segmented_record_nsr[x])

    my_features = pd.Series([np.around(ff1, 3), np.around(ff2, 3), np.around(ff3, 3), np.around(ff4, 3), np.around(ff5, 3), 
                             np.around(ff6, 3), np.around(ff7, 3)],
                            index=['Mean','STD','RMSSD','Normalized RMSSD','Shannon Entropy',
                                   'Mean absolute deviation','Median absolute deiviation'])
    all_features_nsr.append(my_features)

### Preprocess the rhythms' labels/targets

In [12]:
y_amount_nsr = Label_nsr[:amount_nsr-1] 
y_shape_nsr = y_amount_nsr.reshape(seg_shape_nsr, seg_len) 
y_list_nsr = []

### Calc if every 20 segment block is Normal synus rythm or AFIB
for g in range(len(y_shape_nsr)):
    #y_segment = y_shape
    sum_segment_nsr = np.sum(y_shape_nsr[g])
    if sum_segment_nsr >= int(seg_len/2): 
        sum_segment_nsr = 1
    else: sum_segment_nsr = 0
    y_list_nsr.append(sum_segment_nsr)

y_list_nsr = np.array(y_list_nsr, dtype=np.float64)
print(y_list_nsr.dtype)
print(f"Length of y_list: {len(y_list_nsr)}")