In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, BatchNormalization, Activation, Flatten, Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
import sys
sys.path.append("../../common_tools/signal_processing")
from normalize_signal import normalize_ppg

def create_dilated_cnn(input_shape):
    model = Sequential()
    model.add(Input(shape=input_shape, name="input_layer"))
    model.add(Conv1D(filters=4, kernel_size=3, dilation_rate=1, padding='same', name="conv1"))
    model.add(BatchNormalization(name="batchnorm1"))
    model.add(Activation('elu', name="activation1"))

    dilation_rates = [2, 4, 8, 16, 32, 64]
    for idx, rate in enumerate(dilation_rates):
        model.add(Conv1D(filters=32, kernel_size=3, dilation_rate=rate, padding='same', name=f"conv{idx+2}"))
        model.add(BatchNormalization(name=f"batchnorm{idx+2}"))
        model.add(Activation('elu', name=f"activation{idx+2}"))

    model.add(Conv1D(filters=1, kernel_size=1, activation='sigmoid', name="output_layer"))
    return model

# Prepare PPG data and labels
def process_ppg_file(file_path, label_path, segment_length=256):
    """
    Read PPG signals and labels from files, normalize them, and divide into segments.
    """
    # Read PPG signal
    ppg_data = pd.read_csv(file_path, header=None).squeeze("columns").values
    normalized_ppg = normalize_ppg(ppg_data)

    peaks_indices = pd.read_csv(label_path, header=None).squeeze("columns").values
    labels, segments = [], []
    for i in range(0, len(normalized_ppg), segment_length):
        segment = normalized_ppg[i:i+segment_length]
        if len(segment) < segment_length:
            segment = np.pad(segment, (0, segment_length - len(segment)), 'constant', constant_values=0)
        segments.append(segment)
        label = np.zeros(segment_length)
        for peak in peaks_indices:
            if i <= peak < i + segment_length:
                peak_pos = peak - i
                label[max(0, peak_pos - 2):min(segment_length, peak_pos + 3)] = 1
        labels.append(label)
    return np.array(segments), np.array(labels)

# File paths
input_dir = 'E:/dilated_cnn_peak_detection_model_data/train/train_data/data'
label_dir = 'E:/dilated_cnn_peak_detection_model_data/train/train_data/label'
segment_length = 256  

# Create the model
input_shape = (segment_length, 1)
model = create_dilated_cnn(input_shape)

# Initialize optimizer with a low learning rate
initial_learning_rate = 1e-4
optimizer = Adam(learning_rate=initial_learning_rate)

# Compile the model
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks to reduce learning rate and stop early
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, min_lr=1e-8, verbose=1)
early_stopping = EarlyStopping(monitor='loss', patience=10, verbose=1, restore_best_weights=True)

# Train the model with each file and calculate the global loss
global_loss = 0
file_count = 0

for file_name in os.listdir(input_dir):
    input_file = os.path.join(input_dir, file_name)
    label_file = os.path.join(label_dir, file_name.replace(".csv", "_labeled_peaks.csv"))
    if os.path.exists(label_file):
        print(f"Processing: {file_name}")
        ppg_segments, ppg_labels = process_ppg_file(input_file, label_file, segment_length)
        ppg_segments = np.expand_dims(ppg_segments, axis=-1)

        history = model.fit(
            ppg_segments,
            ppg_labels,
            epochs=200,  
            batch_size=32, 
            verbose=0,
            callbacks=[reduce_lr, early_stopping]
        )
        file_loss = history.history['loss'][-1]
        print(f"Loss for file {file_name}: {file_loss:.4f}")
        global_loss += file_loss
        file_count += 1

# Calculate the average global loss
global_loss /= file_count
print(f"Global loss (average across all files): {global_loss:.4f}")

# Save the trained model
model.save("../dilated_cnn_peak_detection_model.h5", include_optimizer=False)
print("Training complete, and the model has been saved.")


Processing: mimic_perform_af_004_data.csv
Restoring model weights from the end of the best epoch: 200.
Loss for file mimic_perform_af_004_data.csv: 0.0408
Processing: mimic_perform_af_005_data.csv

Epoch 87: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.

Epoch 101: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.

Epoch 110: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.

Epoch 118: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.

Epoch 123: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 123: early stopping
Restoring model weights from the end of the best epoch: 113.
Loss for file mimic_perform_af_005_data.csv: 0.0161
Processing: mimic_perform_af_006_data.csv

Epoch 45: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.

Epoch 57: ReduceLROnPlateau reducing learning rate to 7.81249980263965e-07.

Epoch 62: ReduceLROnPlateau reducing learning rate to 3.906249901319825e-07



Loss for file p097547-2125-10-21-23-43.csv: 0.0290
Global loss (average across all files): 0.0650
Training complete, and the model has been saved.
