In [1]:
import wfdb
import numpy as np
import matplotlib.pyplot as plt
import pywt
import csv
import os
from wfdb import processing
import datetime
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

In [2]:
def denoise(data):
    # 小波变换
    coeffs = pywt.wavedec(data=data, wavelet='db5', level=9)
    cA9, cD9, cD8, cD7, cD6, cD5, cD4, cD3, cD2, cD1 = coeffs

    # 阈值去噪
    threshold = (np.median(np.abs(cD1)) / 0.6745) * (np.sqrt(2 * np.log(len(cD1))))
    cD1.fill(0)
    cD2.fill(0)
    for i in range(1, len(coeffs) - 2):
        coeffs[i] = pywt.threshold(coeffs[i], threshold)

    # 小波反变换,获取去噪后的信号
    rdata = pywt.waverec(coeffs=coeffs, wavelet='db5')
    return rdata


def find_peak(file_addr, record):
#     record = wfdb.rdrecord(file_addr, sampfrom=0)

    # Use the GQRS algorithm to detect QRS locations in the first channel
    qrs_inds = processing.qrs.gqrs_detect(sig=record.p_signal[:, 0], fs=record.fs)

    # Correct the peaks shifting them to local maxima
    min_bpm = 20
    max_bpm = 230
    # min_gap = record.fs * 60 / min_bpm
    # Use the maximum possible bpm as the search radius
    search_radius = int(record.fs * 60 / max_bpm)
    corrected_peak_inds = processing.peaks.correct_peaks(record.p_signal[:, 0],
                                                         peak_inds=qrs_inds,
                                                         search_radius=search_radius,
                                                         smooth_window_size=150)

    return sorted(corrected_peak_inds)


def get_data(file_addr, X_data, Y_data):
    print("reading " + file_addr + " data....")
    record = wfdb.rdrecord("training2017/"+file_addr, sampfrom=0)
    peak_index = find_peak(file_addr, record)
    data = record.p_signal.flatten()
    rdata = denoise(data=data)
#     start = 10
#     end = 5
#     i = start
#     j = len(peak_index) - end
    
    x_train=rdata[peak_index[1]-99:peak_index[1]+201]
    X_data.append(x_train)
    
    
def loadData():
    #read csv file     
    with open("training2017/test.csv") as f:
        file_addr_list = []
        ecg_type_list = []
        reader = csv.reader(f)
        for row in reader:
            file_addr_list.append(row[0])
            ecg_type_list.append(row[1])
        
    dataSet = []
    lableSet = ecg_type_list
    
    for n in file_addr_list:
        get_data(n, dataSet, lableSet)
         
    # 转numpy数组,打乱顺序
    dataSet = np.array(dataSet)
    lableSet = np.array(lableSet)
    dataSet = np.array(dataSet).reshape(-1, 270)
    lableSet = np.array(lableSet).reshape(-1, 1)
    train_ds = np.hstack((dataSet, lableSet))
    np.random.shuffle(train_ds)
    
    X = train_ds[:, :300].reshape(-1, 300, 1)
    Y = train_ds[:, 300]
    RATIO = 0.3
    shuffle_index = np.random.permutation(len(X))
    test_length = int(RATIO * len(shuffle_index)) # RATIO = 0.3
    test_index = shuffle_index[:test_length]
    train_index = shuffle_index[test_length:]
    X_test, Y_test = X[test_index], Y[test_index]
    X_train, Y_train = X[train_index], Y[train_index]
    return X_train, Y_train, X_test, Y_test
#     return dataSet, lableSet


def loadtestData():
    #read csv file     
    with open("validation/test.csv") as f:
        file_addr_list = []
        ecg_type_list = []
        reader = csv.reader(f)
        for row in reader:
            file_addr_list.append(row[0])
            ecg_type_list.append(row[1])
        
    dataSet = []
    lableSet = ecg_type_list
    
    for n in file_addr_list:
        get_data(n, dataSet, lableSet)
    dataSet = np.array(dataSet)
    lableSet = np.array(lableSet)
    return dataSet, lableSet


def buildModel():
    newModel = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(300, 1)),
        # 第一个卷积层, 4 个 21x1 卷积核
        tf.keras.layers.Conv1D(filters=4, kernel_size=21, strides=1, padding='SAME', activation='relu'),
        # 第一个池化层, 最大池化,4 个 3x1 卷积核, 步长为 2
        tf.keras.layers.MaxPool1D(pool_size=3, strides=2, padding='SAME'),
        # 第二个卷积层, 16 个 23x1 卷积核
        tf.keras.layers.Conv1D(filters=16, kernel_size=23, strides=1, padding='SAME', activation='relu'),
        # 第二个池化层, 最大池化,4 个 3x1 卷积核, 步长为 2
        tf.keras.layers.MaxPool1D(pool_size=3, strides=2, padding='SAME'),
        # 第三个卷积层, 32 个 25x1 卷积核
        tf.keras.layers.Conv1D(filters=32, kernel_size=25, strides=1, padding='SAME', activation='relu'),
        # 第三个池化层, 平均池化,4 个 3x1 卷积核, 步长为 2
        tf.keras.layers.AvgPool1D(pool_size=3, strides=2, padding='SAME'),
        # 第四个卷积层, 64 个 27x1 卷积核
        tf.keras.layers.Conv1D(filters=64, kernel_size=27, strides=1, padding='SAME', activation='relu'),
        # 打平层,方便全连接层处理
        tf.keras.layers.Flatten(),
        # 全连接层,128 个节点
        tf.keras.layers.Dense(128, activation='relu'),
        # Dropout层,dropout = 0.2
        tf.keras.layers.Dropout(rate=0.2),
        # 全连接层,5 个节点
        tf.keras.layers.Dense(5, activation='softmax')
    ])
    return newModel

In [3]:
# X_train,Y_train为所有的数据集和标签集
# X_test,Y_test为拆分的测试集和标签集
X_train, Y_train, X_test, Y_test = loadData()
# # 构建CNN模型
# model = buildModel()
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# model.summary()
# # 定义TensorBoard对象
# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
# # 训练与验证
# model.fit(X_train, Y_train, epochs=30, batch_size=128, validation_split=RATIO, callbacks=[tensorboard_callback])
# model.save(filepath=model_path)

# # 预测
# Y_pred = model.predict_classes(X_test)
# Y_pred

reading A00001 data....
reading A00002 data....
reading A00003 data....
reading A00004 data....
reading A00005 data....
reading A00006 data....
reading A00007 data....
reading A00008 data....
reading A00009 data....
reading A00010 data....
reading A00011 data....
reading A00012 data....
reading A00013 data....
reading A00014 data....
reading A00015 data....
reading A00016 data....
reading A00017 data....
reading A00018 data....
reading A00019 data....
reading A00020 data....
reading A00021 data....
reading A00022 data....
reading A00023 data....
reading A00024 data....
reading A00025 data....
reading A00026 data....
reading A00027 data....
reading A00028 data....
reading A00029 data....
reading A00030 data....
reading A00031 data....
reading A00032 data....
reading A00033 data....
reading A00034 data....
reading A00035 data....
reading A00036 data....
reading A00037 data....
reading A00038 data....
reading A00039 data....
reading A00040 data....
reading A00041 data....
reading A00042 d

  "boundary effects.").format(level))


reading A00094 data....
reading A00095 data....
reading A00096 data....
reading A00097 data....
reading A00098 data....
reading A00099 data....
reading A00100 data....
reading A00101 data....
reading A00102 data....
reading A00103 data....
reading A00104 data....
reading A00105 data....
reading A00106 data....
reading A00107 data....
reading A00108 data....
reading A00109 data....
reading A00110 data....
reading A00111 data....
reading A00112 data....
reading A00113 data....
reading A00114 data....
reading A00115 data....
reading A00116 data....
reading A00117 data....
reading A00118 data....
reading A00119 data....
reading A00120 data....
reading A00121 data....
reading A00122 data....
reading A00123 data....
reading A00124 data....
reading A00125 data....


ValueError: cannot reshape array of size 37500 into shape (270)

In [None]:
# train_images, train_labels = loadData()
# test_images, test_labels = loadtestData()
# model = models.Sequential()
# model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# model.add(layers.Flatten())
# model.add(layers.Dense(64, activation='relu'))
# model.add(layers.Dense(10))

# model.compile(optimizer='adam',
#               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#               metrics=['accuracy'])

# history = model.fit(train_images, train_labels, epochs=10, 
#                     validation_data=(test_images, test_labels))

# print(test_acc)