# ECG PPG Detection Pipeline

This notebook provides a step-by-step implementation of the main logic from `main.py` for PPG peaks detection using convolutional neural networks. It covers data preparation, model definition, training, evaluation

## 1. Import Required Libraries and Modules

Import all necessary libraries and set up the environment for TensorFlow and multiprocessing.

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import csv
from make_data import *
from util import *
import tqdm
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from multiprocessing import Pool

physical_devices = tf.config.list_physical_devices('GPU')
# tf.config.experimental.set_memory_growth(physical_devices[0], True)





## 2. Define PPG peak Detection Model

Define the convolutional neural network (CNN) model for QRS detection using TensorFlow Keras.

In [2]:
def get_ppg_model(input_shape=NEIGHBOUR_POINT, learning_rate=0.02, momentum=0.9):
    cnn_model = tf.keras.models.Sequential()
    cnn_model.add(tf.keras.layers.Conv1D(filters=8, kernel_size=5, padding='same', activation='relu',
                                         input_shape=(input_shape, 1), data_format="channels_last", ))
    cnn_model.add(tf.keras.layers.MaxPool1D(pool_size=2, strides=2, padding='same'))
    cnn_model.add(tf.keras.layers.Conv1D(filters=16, kernel_size=5, padding='same', activation='relu'))
    cnn_model.add(tf.keras.layers.Flatten())
    cnn_model.add(tf.keras.layers.Dense(32, activation='relu'))
    cnn_model.add(tf.keras.layers.Dense(2, activation='softmax'))
    cnn_model.summary()
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate,momentum=momentum)
    loss = tf.keras.losses.binary_crossentropy
    cnn_model.compile(optimizer, loss=loss, metrics=['accuracy'])
    return cnn_model

## 3. Train the Model

Train the QRS detection model using the preprocessed data, TensorBoard, and model checkpoints.

In [3]:
def train_model(model, batch_size=128, epoch=1):
    if os.path.exists(SAVE_MODEL_DIR + TEST_TIME):
        print(f"Model {TEST_TIME} was trained and is ready")
        return

    shuffle_buffer = batch_size * 100
    prefetch_buffer = batch_size * 100
    train_set = get_record_preprocessed('train')
    sample = 0
    for file in train_set:
        if file.split('.')[1] == '2':
            continue
        header = wfdb.rdheader(os.path.join(MITDB_DIR, file.split('.')[0]))
        sample += header.sig_len - (NEIGHBOUR_POINT - 1) * 2

    train_data = get_tf_records(get_record_preprocessed('train'), batch_size, shuffle_buffer, prefetch_buffer)
    valid_data = get_tf_records(get_record_preprocessed('eval'), batch_size, shuffle_buffer, prefetch_buffer, mode='eval')

    log_dir = TENSOR_BOARD_DIR + TEST_TIME
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir)

    check_point_dir = CHECK_POINT_DIR + TEST_TIME + '/'
    if not os.path.exists(check_point_dir):
        os.makedirs(check_point_dir)
    checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=check_point_dir + "{epoch:02d}.weights.h5",
                                                    save_weights_only=True, verbose=0, save_freq='epoch')
    callback = [tensorboard, checkpoint]
    with tf.device("/GPU:0"):
        model.fit(train_data,
                  steps_per_epoch=int(sample/batch_size),
                  epochs=epoch,
                  verbose=1,
                  validation_data=valid_data,
                  callbacks=callback)
        model.save(SAVE_MODEL_DIR + TEST_TIME + "/last_ckt.weights.h5")

## 4. Evaluate Model on Test Data

Evaluate the trained QRS detection model on the test/evaluation data and write results to a CSV file.

In [4]:
# def get_result(result_file_name, checkpoint=True, checkpoint_epoch=0, saved_model_name=None, batch_size=128):
#     if not os.path.exists(RESULT_DIR):
#         os.makedirs(RESULT_DIR)

#     with open(RESULT_DIR + result_file_name + '.csv', 'w', newline='') as result_file:
#         writer = csv.writer(result_file)
#         writer.writerow(["Recording", "TP", "FN", 'FP', 'Se', 'P+'])

#         # Tổng cộng dồn TP, FN, FP
#         TP_total = 0
#         FN_total = 0
#         FP_total = 0

#         shuffle_buffer = batch_size * 100
#         prefetch_buffer = batch_size * 100

#         # Load model
#         if checkpoint:
#             model = get_qrs_model()
#             model.load_weights(CHECK_POINT_DIR + TEST_TIME + "/0{}.weights.h5".format(checkpoint_epoch))
#             print('Load model checkpoint')
#         else:
#             model = tf.keras.models.load_model(SAVE_MODEL_DIR + saved_model_name)

#         # Lấy danh sách file
#         files = get_record_preprocessed('eval')
#         for file in files:
#             if file.split('.')[0] in ['104', '102', '107', '217']:
#                 continue

#             test_data = get_tf_records(file, batch_size, shuffle_buffer, prefetch_buffer, mode='eval')
#             with tf.device("/GPU:0"):
#                 prediction = model.predict(test_data, verbose=0)
#             prediction = np.rint(prediction)

#             # result = [TP, FN, FP, Se, P+]
#             result = evaluate(file.split('.')[0], prediction, MITDB_DIR)
#             print(file, result)

#             # Cộng dồn TP, FN, FP
#             TP_total += result[0]
#             FN_total += result[1]
#             FP_total += result[2]

#             writer.writerow([file, result[0], result[1], result[2], result[3], result[4]])

#         # Tính Se và P+ tổng
#         Se_total = TP_total / (TP_total + FN_total) if (TP_total + FN_total) > 0 else 0
#         Pp_total = TP_total / (TP_total + FP_total) if (TP_total + FP_total) > 0 else 0

#         print(['total', TP_total, FN_total, FP_total, Se_total, Pp_total])
#         writer.writerow(['total', TP_total, FN_total, FP_total, Se_total, Pp_total])

def get_result(result_file_name, checkpoint=True, checkpoint_epoch=0, saved_model_name=None, batch_size=128):
    if not os.path.exists(RESULT_DIR):
        os.makedirs(RESULT_DIR)

    with open(RESULT_DIR + result_file_name + '.csv', 'w', newline='',encoding='utf-8') as result_file:
        writer = csv.writer(result_file)
        # Sửa Header
        writer.writerow(['Recording', 'BPM_Thật (từ .atr)', 'BPM_Dự_đoán (từ Model)', 'Sai_số_BPM (MAE)'])

        # Biến đếm tổng sai số
        total_bpm_error = 0.0
        file_count = 0

        shuffle_buffer = batch_size * 100
        prefetch_buffer = batch_size * 100

        # Load model
        if checkpoint:
            model = get_ppg_model() # (Đảm bảo đây là model 'tiny')
            
            # Sửa lỗi load checkpoint:
            # Dùng :02d để format số (ví dụ: 03, 10, 20)
            checkpoint_path = os.path.join(CHECK_POINT_DIR, TEST_TIME, "{:02d}.weights.h5".format(checkpoint_epoch))
            model.load_weights(checkpoint_path)
            print(f'Load model checkpoint từ: {checkpoint_path}')
        else:
            model = tf.keras.models.load_model(os.path.join(SAVE_MODEL_DIR, saved_model_name))

        # Lấy danh sách file 'eval'
        files = get_record_preprocessed('eval')
        for file in files:
            # Bỏ qua các file không liên quan (nếu có)
            if file.split('.')[0] in ['104', '102', '107', '217']:
                continue

            test_data = get_tf_records(file, batch_size, shuffle_buffer, prefetch_buffer, mode='eval')
            with tf.device("/GPU:0"):
                prediction = model.predict(test_data, verbose=0)
            
            # Làm tròn output của model (ví dụ [0.1, 0.9] -> [0., 1.])
            prediction = np.rint(prediction)

            # --- LOGIC MỚI ---
            # Gọi hàm evaluate (đã sửa)
            # (Giả định MITDB_DIR là đường dẫn đến file .hea/.atr gốc)
            bpm_true, bpm_pred = evaluate(file.split('.')[0], prediction, MITDB_DIR)
            
            # Tính sai số
            bpm_error = 0.0
            if bpm_true > 0 and bpm_pred > 0: # Chỉ tính sai số nếu cả 2 đều hợp lệ
                bpm_error = np.abs(bpm_true - bpm_pred)
                total_bpm_error += bpm_error
                file_count += 1

            # In kết quả
            print(f"File: {file}, BPM Thật: {bpm_true:.2f}, BPM Dự đoán: {bpm_pred:.2f}, Sai số: {bpm_error:.2f}")

            # Ghi ra CSV
            writer.writerow([file, f"{bpm_true:.2f}", f"{bpm_pred:.2f}", f"{bpm_error:.2f}"])

        # Tính sai số BPM trung bình toàn bộ
        mean_avg_error = total_bpm_error / file_count if file_count > 0 else 0
        
        print("\n-------------------------------------------------")
        print(f"SAI SỐ BPM TRUNG BÌNH (MAE): {mean_avg_error:.2f} BPM")
        print("-------------------------------------------------")
        
        writer.writerow([]) # Dòng trống
        writer.writerow(['Total_Mean_Average_Error (BPM)', f"{mean_avg_error:.2f}", '', ''])

In [5]:
MITDB_DIR = r"D:\altium and source projects\Health monitoring\physionet.org\files\pulse-transit-time-ppg\1.1.0"
# files = get_record_raw(MITDB_DIR)
# print("Số file .hea tìm thấy:", len(files))
# print("Ví dụ vài file:", files[:len(files)])
# file_test = files[0]
# data, label = preprocess_data(file_test, separate=None)
# print("Shape data:", data.shape if data is not None else None)
# print("Shape label:", label.shape if label is not None else None)
# save_tf_record(files[0])
# print("Danh sách TFRecord:", [f for f in os.listdir(PREPROCESSED_DATA_DIR) if f.endswith(".tfrecord")])
# generate_data(get_record_raw(MITDB_DIR), None)
# print("\n=== Danh sách TFRecord sinh ra ===")
# print([f for f in os.listdir(PREPROCESSED_DATA_DIR) if f.endswith(".tfrecord")])
# train_files = get_record_preprocessed("train")
# eval_files = get_record_preprocessed("eval")
# print("Train:", len(train_files))
# print("Eval:", len(eval_files))
# train_model(get_ppg_model(), epoch=10)
get_result(TEST_TIME, checkpoint=True, checkpoint_epoch=9, saved_model_name='run-0')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  saveable.load_own_variables(weights_store.get(inner_path))


Load model checkpoint từ: ./qrs_detection_dataset/checkpoint/mitdb/run-0\09.weights.h5




File: s10_sit.tfrecord, BPM Thật: 75.07, BPM Dự đoán: 74.99, Sai số: 0.09




File: s11_sit.tfrecord, BPM Thật: 73.06, BPM Dự đoán: 73.33, Sai số: 0.27




File: s12_sit.tfrecord, BPM Thật: 72.97, BPM Dự đoán: 72.97, Sai số: 0.00




File: s13_sit.tfrecord, BPM Thật: 74.71, BPM Dự đoán: 74.62, Sai số: 0.09




File: s14_sit.tfrecord, BPM Thật: 53.97, BPM Dự đoán: 53.75, Sai số: 0.23




File: s15_sit.tfrecord, BPM Thật: 86.07, BPM Dự đoán: 82.96, Sai số: 3.11




File: s16_sit.tfrecord, BPM Thật: 64.60, BPM Dự đoán: 64.63, Sai số: 0.03




File: s17_sit.tfrecord, BPM Thật: 77.23, BPM Dự đoán: 78.93, Sai số: 1.70




File: s18_sit.tfrecord, BPM Thật: 76.59, BPM Dự đoán: 76.62, Sai số: 0.03




File: s19_sit.tfrecord, BPM Thật: 82.40, BPM Dự đoán: 81.75, Sai số: 0.65




File: s1_sit.tfrecord, BPM Thật: 72.66, BPM Dự đoán: 72.68, Sai số: 0.02




File: s20_sit.tfrecord, BPM Thật: 78.95, BPM Dự đoán: 78.91, Sai số: 0.04




File: s21_sit.tfrecord, BPM Thật: 77.81, BPM Dự đoán: 77.84, Sai số: 0.03




File: s22_sit.tfrecord, BPM Thật: 81.98, BPM Dự đoán: 82.03, Sai số: 0.05




File: s2_sit.tfrecord, BPM Thật: 120.93, BPM Dự đoán: 120.95, Sai số: 0.02




File: s3_sit.tfrecord, BPM Thật: 73.97, BPM Dự đoán: 74.02, Sai số: 0.04




File: s4_sit.tfrecord, BPM Thật: 83.19, BPM Dự đoán: 81.79, Sai số: 1.40




File: s5_sit.tfrecord, BPM Thật: 77.77, BPM Dự đoán: 77.78, Sai số: 0.01




File: s6_sit.tfrecord, BPM Thật: 77.72, BPM Dự đoán: 77.78, Sai số: 0.05




File: s7_sit.tfrecord, BPM Thật: 64.73, BPM Dự đoán: 64.73, Sai số: 0.00




File: s8_sit.tfrecord, BPM Thật: 86.09, BPM Dự đoán: 80.45, Sai số: 5.64
File: s9_sit.tfrecord, BPM Thật: 69.95, BPM Dự đoán: 72.03, Sai số: 2.08

-------------------------------------------------
SAI SỐ BPM TRUNG BÌNH (MAE): 0.71 BPM
-------------------------------------------------


