In [1]:
import wfdb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split as Split
from sklearn.preprocessing import normalize
import scipy.signal

In [2]:
WINDOW_SIZE = 360
PADDING = 360

In [3]:
records = [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124,
           200, 201, 202, 203, 205, 207, 208, 209, 210, 212, 213, 214, 215, 217, 219, 220, 221, 222, 223, 228, 230, 231, 232, 233, 234]


In [4]:
def create_set(records):
    # signals, annotations = np.empty(shape=(0, WINDOW_SIZE + PADDING, 1)), np.array([])
    signals = []
    annotations = []
    for rec in records:
#         print(f'Record: {rec}')
        record = str("./records/" + str(rec))
        rec = wfdb.rdrecord(record)
        ann = vars(wfdb.rdann(record_name=record, extension='atr'))
        sig, fields = wfdb.rdsamp(record_name=record, channel_names=["MLII"])
        if sig is not None:
            for i in range(int(ann['ann_len'])):

                location = int(ann['sample'][i])
                annotation = ann['symbol'][i]
                half_window_with_padding = int((WINDOW_SIZE + PADDING) / 2)

                if half_window_with_padding <= location <= len(sig) - half_window_with_padding:
                    signal = sig[(location - half_window_with_padding):(location + half_window_with_padding)]
                    signals.append(signal) # = np.append(signals, [signal], axis=0)
                    annotations.append(annotation) # = np.append(annotations, annotation)

    signals = np.array(signals).reshape(-1, WINDOW_SIZE + PADDING, 1)
    annotations = np.array(annotations).reshape(-1, 1)

    print(signals.shape, annotations.shape)
    return signals, annotations

In [5]:
def create_dataframe(signals, annotations):
    dfs = pd.DataFrame(signals.reshape(signals.shape[0], signals.shape[1]), index=np.arange(signals.shape[0]))
    dfa = pd.DataFrame(annotations, index=np.arange(signals.shape[0]))
    df = pd.concat([dfa, dfs], axis=1)
    return df

In [6]:
training_set, testing_set = Split(records)

In [7]:
training_set, validation_set = Split(training_set)

In [8]:
print("Training:", training_set, "\nValidating:", validation_set, "\nTesting:", testing_set)

Training: [215, 223, 210, 113, 116, 122, 209, 232, 201, 103, 115, 119, 219, 100, 118, 112, 231, 233, 106, 230, 207, 200, 213, 121, 220, 228, 105] 
Validating: [222, 111, 102, 104, 217, 214, 234, 107, 108] 
Testing: [208, 221, 205, 202, 101, 109, 212, 124, 117, 203, 114, 123]


In [9]:
signals, annotations = create_set(training_set)
df = create_dataframe(signals, annotations)
df.to_csv("training.csv")

(64792, 720, 1) (64792, 1)


In [10]:
signals, annotations = create_set(validation_set)
df = create_dataframe(signals, annotations)
df.to_csv("validation.csv")

(16049, 720, 1) (16049, 1)


In [11]:
signals, annotations = create_set(testing_set)
df = create_dataframe(signals, annotations)
df.to_csv("testing.csv")

(27140, 720, 1) (27140, 1)


In [12]:
signals, annotations = create_set(records)
df = create_dataframe(signals, annotations)
df.to_csv("records.csv")

(107981, 720, 1) (107981, 1)


In [13]:
training_set, testing_set = Split(df, stratify=df.iloc[:,0])

In [14]:
training_set, validation_set = Split(training_set, stratify=training_set.iloc[:,0])

In [16]:
df = pd.read_csv('records.csv')

In [17]:
li = df.iloc[:,1].value_counts()
s = sum(li)
print(s)
for l in li.items():
    print(l[0], l[1], round(l[1]/s, 3))

107981
N 74703 0.692
L 8066 0.075
R 7251 0.067
V 7120 0.066
/ 3615 0.033
A 2545 0.024
+ 1195 0.011
F 802 0.007
~ 577 0.005
! 472 0.004
" 437 0.004
f 260 0.002
j 229 0.002
x 193 0.002
a 150 0.001
| 132 0.001
E 106 0.001
J 83 0.001
e 16 0.0
Q 15 0.0
[ 6 0.0
] 6 0.0
S 2 0.0


In [18]:
df.iloc[0,2:].to_numpy().flatten()

array([-0.145, -0.15, -0.16, -0.155, -0.16, -0.175, -0.18, -0.185, -0.17,
       -0.155, -0.175, -0.18, -0.19, -0.18, -0.155, -0.135, -0.155, -0.19,
       -0.205, -0.235, -0.225, -0.245, -0.25, -0.26, -0.275, -0.275,
       -0.275, -0.265, -0.255, -0.265, -0.275, -0.29, -0.29, -0.29, -0.29,
       -0.285, -0.295, -0.305, -0.285, -0.275, -0.275, -0.28, -0.285,
       -0.305, -0.29, -0.3, -0.28, -0.29, -0.3, -0.315, -0.32, -0.335,
       -0.36, -0.385, -0.385, -0.405, -0.455, -0.485, -0.485, -0.425,
       -0.33, -0.22, -0.07, 0.12, 0.375, 0.62, 0.78, 0.84, 0.765, 0.52,
       0.17, -0.165, -0.365, -0.435, -0.425, -0.37, -0.33, -0.325, -0.335,
       -0.345, -0.33, -0.325, -0.315, -0.31, -0.32, -0.335, -0.34, -0.325,
       -0.345, -0.335, -0.33, -0.335, -0.33, -0.325, -0.33, -0.33, -0.345,
       -0.355, -0.335, -0.325, -0.305, -0.32, -0.32, -0.33, -0.34, -0.335,
       -0.34, -0.345, -0.355, -0.355, -0.34, -0.33, -0.33, -0.33, -0.34,
       -0.35, -0.325, -0.325, -0.33, -0.33, -0.335,

In [19]:
# plt.plot(filtered[WINDOW_SIZE-int(PADDING/2):WINDOW_SIZE+int(PADDING/2)])
%matplotlib tk
window = df.iloc[0, WINDOW_SIZE-int(PADDING/2):WINDOW_SIZE+int(PADDING/2)]
b, a = scipy.signal.butter(3, [0.03, 0.13], "band")
filtered = scipy.signal.filtfilt(b, a, window.to_numpy().flatten())
plt.plot(np.arange(360), window, label='Original')
plt.plot(np.arange(360), filtered, label='Filtered')
plt.xlabel('Samples')
plt.ylabel('mV')
plt.legend()
plt.show()

## 