In [1]:
import wfdb
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split as Split

In [2]:
WINDOW_SIZE = 360
PADDING = 360

In [3]:
records = [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124,
           200, 201, 202, 203, 205, 207, 208, 209, 210, 212, 213, 214, 215, 217, 219, 220, 221, 222, 223, 228, 230, 231, 232, 233, 234]


In [4]:
def create_set(records):
    signals, annotations = np.empty(shape=(0,WINDOW_SIZE+PADDING,1)), np.array([])
    for rec in records:
        record = str("./records/" + str(rec))
        rec = wfdb.rdrecord(record)
        ann = wfdb.rdann(record_name=record, extension='atr')
        sig, fields = wfdb.rdsamp(record_name=record, channel_names=["MLII"])
        if sig is not None:
            for location, annotation in zip(ann.sample, ann.symbol):
                half_window_with_padding = int((WINDOW_SIZE + PADDING) / 2)
                if location >= half_window_with_padding and location <= len(sig)-half_window_with_padding:
                    signal = sig[(location-half_window_with_padding):(location+half_window_with_padding)]
                    signals = np.append(signals, [signal], axis=0)
                    annotations = np.append(annotations, annotation)
    print(signals.shape, annotations.shape)
    return signals, annotations

In [5]:
def create_dataframe(signals, annotations):
    dfs = pd.DataFrame(signals.reshape(signals.shape[0], signals.shape[1]), index=np.arange(signals.shape[0]))
    dfa = pd.DataFrame(annotations, index=np.arange(signals.shape[0]))
    df = pd.concat([dfa, dfs], axis=1)
    return df

In [6]:
training_set, testing_set = Split(records)

In [7]:
training_set, validation_set = Split(training_set)

In [8]:
print("Training:", training_set, "\nValidating:", validation_set, "\nTesting:", testing_set)

Training: [123, 207, 108, 228, 222, 121, 234, 210, 117, 102, 232, 200, 233, 231, 101, 100, 115, 215, 113, 214, 217, 124, 103, 118, 122, 209, 119] 
Validating: [203, 202, 212, 109, 230, 111, 104, 219, 112] 
Testing: [213, 114, 221, 223, 107, 106, 116, 208, 105, 201, 220, 205]


In [22]:
signals, annotations = create_set(training_set)
df = create_dataframe(signals, annotations)
df.to_csv("training.csv")

(60856, 720, 1) (60856,)


In [23]:
signals, annotations = create_set(validation_set)
df = create_dataframe(signals, annotations)
df.to_csv("validation.csv")

(21047, 720, 1) (21047,)


In [24]:
signals, annotations = create_set(testing_set)
df = create_dataframe(signals, annotations)
df.to_csv("testing.csv")

(26078, 720, 1) (26078,)


In [9]:
signals, annotations = create_set(records)
df = create_dataframe(signals, annotations)
df.to_csv("records.csv")

(107981, 720, 1) (107981,)


In [14]:
training_set, testing_set = Split(df, stratify=df.iloc[:,0])

In [16]:
training_set, validation_set = Split(training_set, stratify=training_set.iloc[:,0])

In [18]:
training_set.to_csv("training_by_classes.csv")
validation_set.to_csv("validating_by_classes.csv")
testing_set.to_csv("testing_by_classes.csv")

In [None]:
# plt.plot(filtered[WINDOW_SIZE-int(PADDING/2):WINDOW_SIZE+int(PADDING/2)])
b, a = scipy.signal.butter(3, [0.03, 0.13], "band")
filtered2 = scipy.signal.filtfilt(b, a, df.iloc[0,1:].to_numpy().flatten())
plt.plot(filtered2[WINDOW_SIZE-int(PADDING/2):WINDOW_SIZE+int(PADDING/2)], label='filtered')
plt.plot(np.arange(360), df.iloc[0,WINDOW_SIZE-int(PADDING/2):WINDOW_SIZE+int(PADDING/2)], label='raw')
plt.legend()
plt.show()
print(df.iloc[0,0])

## 