In [5]:
import numpy as np
from matplotlib import pyplot as plt
from sklearn import preprocessing
import wfdb
import copy as cp
import scipy.signal as signal
import pickle
from sklearn import preprocessing
from tqdm import tqdm
import os
import re
import pandas as pd
import csv

# Process Information Taken from Raw Files

## Import our Previously Extracted Data

In [6]:
record_list = [] # Initialize the array that will hold the list of our records

records = 'mit-bih-dataframes/subject_list.csv' # Get our record list like we did in the initial extraction
with open(records) as rfile:# Load our records into the array
    for record in rfile:
        record = record[0:-1] # The -1 removes the newline ("\n") character from the string
        record_list.append(record)

In [17]:
subject_dataframes = [] # Initialize our dataframe array

for x in tqdm(record_list): # Extract our dataframes from the CSVs we saved previously
    subject_dataframes.append(pd.read_csv('mit-bih-dataframes/'+x+'.csv', index_col=0))
            # 'index_col=0' loads the index that is saved into the csv as the index rather than another column
print(subject_dataframes[0])

100%|██████████| 23/23 [02:20<00:00,  6.09s/it]


         Signal 1  Signal 2  R-Peak  Normal   AFIB  Other
0          -0.275    -0.210   False   False  False   True
1          -0.295    -0.200   False   False  False   True
2          -0.310    -0.220   False   False  False   True
3          -0.315    -0.225   False   False  False   True
4          -0.300    -0.215   False   False  False   True
...           ...       ...     ...     ...    ...    ...
9205755    -0.765    -1.810   False    True  False  False
9205756    -0.680    -1.560   False    True  False  False
9205757    -0.680    -1.345   False    True  False  False
9205758    -0.670    -1.230   False    True  False  False
9205759    -0.665    -1.165   False    True  False  False

[9205760 rows x 6 columns]


In [11]:

# Now we get the indexes of the R-Peaks from our dataframes
qrs = [] # Initialize the array that will hold our list of R-Peaks for each subject

for subject in tqdm(subject_dataframes): # Load each subject
    qrs.append([idx for idx, x in enumerate(subject['R-Peak']) if x]) # Then we use list comprehension to get our r peaks for each subject
                                                                # Recall, the indices that the R-Peak column is true is where there is an r-peak
                                                                # So, we iterate through and identify which indices that is for each subject
            
print(qrs[0])

100%|██████████| 23/23 [00:38<00:00,  1.65s/it]

[61, 200, 358, 584, 729, 873, 1112, 1260, 1409, 1643, 1855, 2072, 2274, 2481, 2687, 2824, 2959, 3090, 3184, 3309, 3426, 3646, 3870, 4004, 4150, 4399, 4538, 4691, 4925, 5142, 5363, 5578, 5783, 5992, 6133, 6272, 6395, 6489, 6609, 6713, 6808, 6923, 7020, 7186, 7309, 7383, 7506, 7615, 7698, 7786, 7908, 8057, 8157, 8298, 8545, 8766, 8979, 9111, 9398, 9544, 9823, 9967, 10099, 10332, 10477, 10657, 10891, 11036, 11184, 11415, 11643, 11845, 12054, 12262, 12478, 12678, 12886, 13090, 13294, 13501, 13707, 13914, 14118, 14332, 14532, 14738, 14943, 15148, 15287, 15416, 15547, 15667, 15779, 15912, 16056, 16164, 16290, 16534, 16675, 16867, 17104, 17238, 17404, 17644, 17779, 17917, 18147, 18284, 18422, 18541, 18648, 18837, 19082, 19220, 19415, 19652, 19787, 19977, 20216, 20352, 20505, 20747, 20886, 21018, 21163, 21288, 21395, 21504, 21655, 21902, 22043, 22192, 22435, 22654, 22873, 23090, 23307, 23519, 23653, 23790, 23912, 24014, 24129, 24270, 24386, 24475, 24614, 24722, 24816, 24926, 25026, 25166, 2528




## Extract RR-Intervals

In 2019, all of our developed variables were based on RR-Intervals, or the distance between adjacent R-Peaks. So, that is the first thing we extract 

In [28]:
rr_ints = [] # Initialize the array that will hold all of our subjects' RR-Intervals

# Then we want to Go through all of our data that holds our R-Peaks [That will be the qrs array]
# Iterating through that we want to extract the interval between each peak. 
# My suggested format is: 
"""
for each subject:
    for each rpeak sample_num in my current subject:
        subject_rrinterval.append( rpeak sample_num - the sample_num before it)
"""

for idxs, subj in enumerate(qrs):
    rrl = []
    for idx, r in enumerate(subj):
        if idx > 0:
            rrl.append(r - subj[idx-1])
            
    rr_ints.append(rrl)

## Save the RR-Intervals 

In [26]:
int_db = []

for idx, x in enumerate(tqdm(record_list)):
    subj = pd.DataFrame(
        data = np.transpose(np.array(rr_ints[idx])), columns = ['RR-Intervals']
    )
    
    int_db.append(subj)
    

100%|██████████| 23/23 [00:00<00:00, 172.88it/s]


In [27]:
# In this block use the code we looked at on 6/30 as a reference to save our rr-intervals as a .CSV

for idx, x in enumerate(tqdm(record_list)): 
    if not os.path.exists('mit-bih-rrintervals/'+x+ '.csv') or reload_flag:
        
        int_db[idx].to_csv('mit-bih-rrintervals/'+x+'.csv') # Pandas DataFrames have a built in to_csv() function which whill save it at the passed path

100%|██████████| 23/23 [00:02<00:00, 11.05it/s]


## What else to extract - some good places to start

- [Moody, 1983 - A new method for detecting atrial fibrillation using R-R intervals.](http://ecg.mit.edu/george/publications/afib-cinc-1983.pdf)
- [Lake, 2010 - Accurate estimation of entropy in very short physiological time series: the problem of atrial fibrillation detection in implanted ventricular devices](https://journals.physiology.org/doi/full/10.1152/ajpheart.00561.2010)
- [Rubio, 2011 - Estudio comparativo de algoritmos para la deteccion de la fibrilacion auricular](https://academica-e.unavarra.es/bitstream/handle/2454/4136/577570.pdf?sequence=1&isAllowed=y)
- [Ghodrati, 2008 - Statistical analysis of RR interval irregularities for detection of atrial fibrillation](https://ieeexplore.ieee.org/document/4749227)
- [Young, 1999 - A comparative study of a hidden Markov model detector for atrial fibrillation](https://ieeexplore.ieee.org/abstract/document/788166)