In [None]:
import pandas as pd
import numpy as np
import wfdb
import os
from scipy import signal
import matplotlib.pyplot as plt
import tensorflow as tf

In [None]:
data_files = ["mit-bih-arrhythmia-database-1.0.0/" + file for file in os.listdir("mit-bih-arrhythmia-database-1.0.0") if ".dat" in file]
data_files=sorted(data_files)

def read_dat_file(file, participant):
    """Utility function
    """
    # Get signal
    # 0 ve 1 farklı kanalları ifade ediyor, 0 lead2, 1 V5
    data = pd.DataFrame({"ECG": wfdb.rdsamp(file[:-4])[0][:, 1]})
    data["Participant"] = "MIT-Arrhythmia_%.2i" %(participant)
    data["Sample"] = range(len(data))
    return data

def read_atr_file(file, participant):
    # getting annotations
    anno = wfdb.rdann(file[:-4], 'atr')
    anno_symbol = anno.symbol
    anno = anno.sample
    anno = pd.DataFrame({"Rpeaks": anno})
    anno["Anno_Symbol"] = anno_symbol
    anno["Participant"] = "MIT-Arrhythmia_%.2i" %(participant)
    return anno

dfs_ecg = []
dfs_rpeaks = []

for participant, file in enumerate(data_files):

    data = read_dat_file(file, participant)
    rpeak = read_atr_file(file, participant)
    
    # Store with the rest
    dfs_ecg.append(data)
    dfs_rpeaks.append(rpeak)


N ---> 0 <br>
S ---> 1 <br>
V ---> 2 <br>
F ---> 3 <br>
Q ---> 4

In [None]:
df_rpeaks = []
for i in range(len(dfs_rpeaks)):
    df_rpeaks.append(dfs_rpeaks[i][np.in1d(dfs_rpeaks[i].Anno_Symbol, ['N','L','R','j','e','a','S','A','J','E','V','F','/','Q','f'])])
    
for i in range(len(df_rpeaks)):
    df_rpeaks[i].Anno_Symbol[np.in1d(df_rpeaks[i].Anno_Symbol, ['N','L','R','j','e'])] = 0
    df_rpeaks[i].Anno_Symbol[np.in1d(df_rpeaks[i].Anno_Symbol, ['a','S','A','J'])] = 1
    df_rpeaks[i].Anno_Symbol[np.in1d(df_rpeaks[i].Anno_Symbol, ['E','V'])] = 2
    df_rpeaks[i].Anno_Symbol[np.in1d(df_rpeaks[i].Anno_Symbol, ['F'])] = 3
    df_rpeaks[i].Anno_Symbol[np.in1d(df_rpeaks[i].Anno_Symbol, ['/','Q','f'])] = 4

In [None]:
for i in range(len(dfs_ecg)):
    dfs_ecg[i]["Rpeaks"] = None
    dfs_ecg[i]["Anno_Symbol"] = None
    dfs_ecg[i].loc[df_rpeaks[i].Rpeaks, "Rpeaks"] = True
    dfs_ecg[i].loc[df_rpeaks[i].Rpeaks, "Anno_Symbol"] = df_rpeaks[i].Anno_Symbol.values

In [None]:
def normalization(x):
    x_min = np.min(x)
    x_max = np.max(x) 
    x = (x - x_min) / (x_max - x_min + 1e-6)
    x = np.clip(x, a_min=0, a_max=1)
    return x

### Centralized Data

In [None]:
all_windows = []
all_labels = []
for ecg_index in range(len(dfs_ecg)):
    ecg = dfs_ecg[ecg_index]
    for i in ecg[ecg["Rpeaks"]==True].index:
        if len(ecg.loc[i-90:i+89]["ECG"].values)==180:
            all_windows.append(normalization(ecg.loc[i-90:i+89]["ECG"].values))
            all_labels.append(ecg.loc[i]["Anno_Symbol"])

In [None]:
all_labels = tf.keras.utils.to_categorical(all_labels, num_classes=5)

In [None]:
all_windows = np.array(all_windows).astype("float32")

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(all_windows, all_labels, test_size = 0.30, random_state = 12)

In [None]:
np.save("X_train", X_train)
np.save("y_train", y_train)
np.save("X_test", X_test)
np.save("y_test", y_test)

### NON-IID

Here, in the first cell, ECGs are allocated to 6 clients as 8 ECGs each. In the next cells, the ECGs from each client were preprocessed and split into training and testing, respectively. 

In [None]:
clients = []
for i in range(6):
    clients.append(dfs_ecg[8*i:8*(i+1)])

In [None]:
all_windows = []
all_labels = []
for ecg_index in range(len(clients[5])):
    ecg = clients[5][ecg_index]
    for i in ecg[ecg["Rpeaks"]==True].index:
        if len(ecg.loc[i-90:i+89]["ECG"].values)==180:
            all_windows.append(normalization(ecg.loc[i-90:i+89]["ECG"].values))
            all_labels.append(ecg.loc[i]["Anno_Symbol"])

In [None]:
np.unique(all_labels)

In [None]:
all_labels = tf.keras.utils.to_categorical(all_labels, num_classes=5)

In [None]:
all_windows = np.array(all_windows).astype("float32")

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(all_windows, all_labels, test_size = 0.30, random_state = 12)

In [None]:
np.save("client_5_X_train", X_train)
np.save("client_5_y_train", y_train)
np.save("client_5_X_test", X_test)
np.save("client_5_y_test", y_test)