In [2]:
!pip install wfdb

Collecting wfdb
  Downloading wfdb-4.3.1-py3-none-any.whl.metadata (3.8 kB)
Collecting soundfile>=0.10.0 (from wfdb)
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Downloading wfdb-4.3.1-py3-none-any.whl (163 kB)
Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl (1.0 MB)
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ------------------------------ --------- 0.8/1.0 MB 5.0 MB/s eta 0:00:01
   ---------------------------------------- 1.0/1.0 MB 4.6 MB/s eta 0:00:00
Installing collected packages: soundfile, wfdb

   -------------------- ------------------- 1/2 [wfdb]
   ---------------------------------------- 2/2 [wfdb]

Successfully installed soundfile-0.13.1 wfdb-4.3.1


In [2]:
# Importing Libraries
import wfdb
import numpy as np
import pandas as pd

In [3]:
FS = 360
WINDOW_SIZE = 10

In [4]:
# Mapping the labels (Normal, Suspicious and Critical)

def map_labels(window_symbols):
    normal = ['N','L','R']
    suspicious = ['A','S','J']
    critical = ['V','F','E']

    if any (sym in critical for sym in window_symbols):
        return "Critical"
    elif any (sym in suspicious for sym in window_symbols):
        return "Suspicious"
    elif any (sym in normal for sym in window_symbols):
        return "Normal"
    else:
        return None

In [5]:
# Processing one record 

record = wfdb.rdrecord('100', pn_dir='mitdb')
annotation = wfdb.rdann('100','atr',pn_dir='mitdb')

r_peaks = annotation.sample
symbols = annotation.symbol

rr_intervals = np.diff(r_peaks)/FS

In [16]:
data = []

for i in range(len(rr_intervals) - WINDOW_SIZE):

    window_rr = rr_intervals[i:i+WINDOW_SIZE]
    window_symbols = symbols[i:i+WINDOW_SIZE]

    label = map_labels(window_symbols)
    if label is None:
        continue

    hr = np.mean(60/window_rr)

    rr_mean = np.mean(window_rr)

    rr_std = np.std(window_rr)

    quality = 1/(1 + rr_std)

    # Additional new 3 features
    rmssd = np.sqrt(np.mean(np.diff(window_rr)**2))

    diff_rr = np.abs(np.diff(window_rr))
    pnn50 = np.sum(diff_rr > 0.05) / len(diff_rr) * 100

    cv = rr_std / rr_mean

    # Addition of 2 more features
    sdsd = np.std(np.diff(window_rr))

    rr_range = np.max(window_rr) - np.min(window_rr)

    data.append([hr, rr_mean, rr_std, quality, rmssd, pnn50, cv, sdsd, rr_range, label])

In [20]:
# Convert to Data Frame

df = pd.DataFrame(
    data,
    columns = ["HR","RR_mean","RR_std","Quality","RMSSD","pNN50","CV", "SDSD", "RR_Range", "Label"]
)

df.head()

Unnamed: 0,HR,RR_mean,RR_std,Quality,RMSSD,pNN50,CV,SDSD,RR_Range,Label
0,107.301668,0.621389,0.131251,0.883977,0.138666,11.111111,0.211223,0.129098,0.458333,Normal
1,90.315229,0.664722,0.01596,0.984291,0.022453,0.0,0.024009,0.022316,0.047222,Normal
2,90.315229,0.664722,0.01596,0.984291,0.022126,0.0,0.024009,0.02195,0.047222,Normal
3,90.506718,0.663333,0.01633,0.983932,0.02195,0.0,0.024618,0.02195,0.047222,Normal
4,90.585615,0.662778,0.016768,0.983508,0.021316,0.0,0.0253,0.021173,0.047222,Normal


In [21]:
# Process all 48 records

RECORDS = [
    '100','101','102','103','104','105','106','107','108','109',
    '111','112','113','114','115','116','117','118','119','121',
    '122','123','124','200','201','202','203','205','207','208',
    '209','210','212','213','214','215','217','219','220','221',
    '222','223','228','230','231','232','233','234'
]

final_data = []

BASE_PATH = r"C:\Users\Shikhaj Somani\OneDrive\Desktop\ML\CardioGuard AI\mit-bih-arrhythmia-database-1.0.0"

for rec in RECORDS:
    print("Processing:", rec)

    record = wfdb.rdrecord(f"{BASE_PATH}/{rec}")
    annotation = wfdb.rdann(f"{BASE_PATH}/{rec}", 'atr')

    r_peaks = annotation.sample
    symbols = annotation.symbol

    rr_intervals = np.diff(r_peaks)/FS

    for i in range(len(rr_intervals) - WINDOW_SIZE):
        window_rr = rr_intervals[i:i+WINDOW_SIZE]
        window_symbols = symbols[i:i+WINDOW_SIZE]

        label = map_labels(window_symbols)
        if label is None:
            continue

        hr = np.mean(60/window_rr)
        rr_mean = np.mean(window_rr)
        rr_std = np.std(window_rr)
        quality = 1/(1 + rr_std)

        # Additional new 3 features
        rmssd = np.sqrt(np.mean(np.diff(window_rr)**2))

        diff_rr = np.abs(np.diff(window_rr))
        pnn50 = np.sum(diff_rr > 0.05) / len(diff_rr) * 100

        cv = rr_std / rr_mean

        # Addition of 2 more features
        sdsd = np.std(np.diff(window_rr))

        rr_range = np.max(window_rr) - np.min(window_rr)

        final_data.append([hr, rr_mean, rr_std, quality, rmssd, pnn50, cv, sdsd, rr_range, label])

final_df = pd.DataFrame(
    final_data,
    columns=["HR","RR_mean","RR_std","Quality","RMSSD","pNN50","CV", "SDSD", "RR_Range", "Label"]
)

final_df.to_csv("mit_dataset3.csv", index=False)

print("Dataset created successfully")

Processing: 100
Processing: 101
Processing: 102
Processing: 103
Processing: 104
Processing: 105
Processing: 106
Processing: 107
Processing: 108
Processing: 109
Processing: 111
Processing: 112
Processing: 113
Processing: 114
Processing: 115
Processing: 116
Processing: 117
Processing: 118
Processing: 119
Processing: 121
Processing: 122
Processing: 123
Processing: 124
Processing: 200
Processing: 201
Processing: 202
Processing: 203
Processing: 205
Processing: 207
Processing: 208
Processing: 209
Processing: 210
Processing: 212
Processing: 213
Processing: 214
Processing: 215
Processing: 217
Processing: 219
Processing: 220
Processing: 221
Processing: 222
Processing: 223
Processing: 228
Processing: 230
Processing: 231
Processing: 232
Processing: 233
Processing: 234
Dataset created successfully


In [22]:
print(final_df.shape)


(105569, 10)
