# (1) Importing/Storing data

In [1]:
import wfdb
from wfdb import processing
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal

In [2]:
records_nsr = []
properties_nsr = []
annot_nsr = []
AnnSymb_nsr = []
AnnSamp_nsr = []
AnnRhythm_nsr = []
new_records_nsr = []
AnnSamp_250_nsr = []

for f in glob.glob('C:/Users/masud/Skrivebord/Github_Bachelor_Project/wfdb-python/data/normal_sinus_rhythm_data/*.dat'): ##### change the path to your own directory.
    sig, fields = wfdb.rdsamp(f[:-4], channels=[1])  #### In this function, you have the opportunity to pass "channels=[0]" or "channels=[1]" to select channel 1 or 2.
    
    ann = wfdb.rdann(f[:-4], 'atr')
    Symb = pd.Series(ann.symbol)
    Samp = pd.Series(ann.sample)

    Rhythm = pd.Series(ann.aux_note)
    records_nsr.append(sig)
    properties_nsr.append(fields)
    annot_nsr.append(ann)
    AnnSymb_nsr.append(Symb)
    AnnSamp_nsr.append(Samp)

    AnnRhythm_nsr.append(Rhythm)
    
    

AnnSymb_nsr = pd.Series(AnnSymb_nsr).values
AnnSamp_nsr = pd.Series(AnnSamp_nsr).values

# (2) Pre processing data

In [3]:
labeled_Rpeaks_nsr = []
appended_data_nsr = []
for i in range(18): ## 18 records
    df = pd.DataFrame(AnnSamp_nsr[i]*2) ### Roughly resampling from 128 to 250 Hz
    appended_data_nsr.append(df)
        

labeled_Rpeaks_nsr = pd.concat(appended_data_nsr)
#labeled_Rpeaks_nsr

In [4]:
labeled_Rpeaks_nsr.rename(columns={0: 'Rpeaks'},
          inplace=True, errors='raise')
labeled_Rpeaks_nsr['Label'] = 0
labeled_Rpeaks_nsr

Unnamed: 0,Rpeaks,Label
0,2,0
1,104,0
2,258,0
3,414,0
4,568,0
...,...,...
111258,19446442,0
111259,19446610,0
111260,19446776,0
111261,19446938,0


In [5]:
Rpeaks_nsr = labeled_Rpeaks_nsr["Rpeaks"]
Label_nsr = labeled_Rpeaks_nsr["Label"]
Label_nsr = np.array(Label_nsr, dtype=np.float64)

#labeled_Rpeaks_N_AFIB[0:1000]
labeled_Rpeaks_nsr.isnull().values.any() ### Check for NaN values

False

In [6]:
def segmenting_record_nsr(seg_value_nsr):
    rri2_nsr = np.diff(Rpeaks_nsr) ### Storing the intervals between rpeaks
    rri2_nsr = np.array(rri2_nsr, dtype=np.float64) ### Ensuring no overflow issues happens, when calculating in for loop later
    
    amount_nsr = -(len(rri2_nsr) % seg_value_nsr) # amount of data points to remove, for equal length segments with no residue points
    print(f"Amount to remove {amount_nsr}")
    
    rec_amount_nsr = rri2_nsr[:amount_nsr]
    seg_shape_nsr = len(rec_amount_nsr) // seg_value_nsr # amount of total segments (given the specified segment length) 
    print(f"Shape 0: {seg_shape_nsr}")
    
    segmented_rec_nsr = rec_amount_nsr.reshape(seg_shape_nsr,seg_value_nsr)
    return segmented_rec_nsr

In [7]:
segmented_record_nsr = segmenting_record_nsr(10) # Specify the segment length

Amount to remove -1
Shape 0: 180679


### X

In [8]:
### Loads in functions for Shannon Entropy, Mean absolute deviation calculations
%run "features_utils.ipynb"
from scipy.stats import median_abs_deviation

In [9]:
all_features_nsr = []

### Calculating features for every single segmented "block" inside the segmented_record_nsr variable
for x in range(len(segmented_record_nsr)):
    
    #MEAN
    ff1 = np.nanmean(segmented_record_nsr[x]) ### nanmean, nanstd computes values while ignoring nan-values
    #STD
    ff2 = np.nanstd(segmented_record_nsr[x])
    #RMSSD
    sum_ = 0
    for y in range(len(segmented_record_nsr[x]) - 1): ### loops 19 times
        sum_ += (segmented_record_nsr[x][y] - segmented_record_nsr[x][y+1])**2
    sum_multiplied = 1/(len(segmented_record_nsr) - 1) * sum_
    ff3 = np.sqrt(sum_multiplied)
    #NORMALIZED RMSSD
    ff4 = (ff3 / ff1)
    #SHANNON ENTROPY
    ff5 = entropy(segmented_record_nsr[x])
    #MEAN ABSOLUTE DEVIATION
    ff6 = mean_abs_deviation(segmented_record_nsr[x])
    #MEDIAN ABSOLUTE DEVIATION
    ff7 = median_abs_deviation(segmented_record_nsr[x])
    
    #COVARIANCE
    #z = np.stack((ff1, ff2))
    #ff5 = np.cov(z) ### try cov with previous segment

    my_features = pd.Series([np.around(ff1,3), np.around(ff2,3), np.around(ff3,3), np.around(ff4,3), np.around(ff5,3), 
                             np.around(ff6,3), np.around(ff7,3)],
                            index=['Mean','STD','RMSSD','Normalized RMSSD','Shannon Entropy',
                                   'Mean absolute deviation','Median absolute deiviation'])
    all_features_nsr.append(my_features)

In [10]:
n = 0

print(f"Features calculated for an example segment:\n{all_features_nsr[n]}") 
print('\n'f"Features variable contains {len(all_features_nsr)} segments")

Features calculated for an example segment:
Mean                          150.600
STD                            16.396
RMSSD                           0.126
Normalized RMSSD                0.001
Shannon Entropy                 0.736
Mean absolute deviation         9.720
Median absolute deiviation      2.000
dtype: float64

Features variable contains 180679 segments


### Y

### Paste the copied information below (indicated with the arrows)
1. Arrow: Add "Amount to remove" - 1, (remember the negative sign).
2. Arrow: Add "Shape 0" as first argument and the segment size as second argument. (Shape 0 , segment size)
3. Arrow: Divide segment size by 2 and add here. (meaning, if over half the segment contains "(AFIB", than classify the whole segment as "(AFIB"

In [11]:
y_amount_nsr = Label_nsr[:-2] # <=============== (1)
y_shape_nsr = y_amount_nsr.reshape(180679, 10) # <=============== (2)
y_list_nsr = []

### Calc if every 20 segment block is Normal synus rythm or AFIB
for g in range(len(y_shape_nsr)):
    #y_segment = y_shape
    sum_segment_nsr = np.sum(y_shape_nsr[g])
    if sum_segment_nsr >= 5: # <=============== (3)
        sum_segment_nsr = 1
    else: sum_segment_nsr = 0
    y_list_nsr.append(sum_segment_nsr)

y_list_nsr = np.array(y_list_nsr, dtype=np.float64)
print(y_list_nsr.dtype)
print(f"Length of y_list: {len(y_list_nsr)}")

float64
Length of y_list: 180679
