In [29]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
import numpy as np
import os
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import OneHotEncoder
import scipy
from scipy.io import loadmat
from scipy.signal import butter, filtfilt, iirnotch
from biosppy.signals import ecg
import ecg_plot
import scipy.io
import matplotlib.pyplot as plt
from model_utils_1d_BiLSTM import ecg_model
from _preprocess_utils_2d import *
import pickle
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, auc
from tensorflow.keras import backend as K
from tensorflow.keras.saving import register_keras_serializable


Ensuring GPU support and set growth

In [2]:
# Get all visible GPUs
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    try:

        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(f"{len(gpus)} physical GPU(s), {len(logical_gpus)} logical GPU(s) configured.")

    except RuntimeError as e:
        print(e)
else:
    print("No GPU detected.")


1 physical GPU(s), 1 logical GPU(s) configured.


I0000 00:00:1752367088.690153    2681 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9558 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070, pci bus id: 0000:01:00.0, compute capability: 8.9


In [3]:
# Set memory growth for all GPUs
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

### Data Generator

In [4]:
root_path = '/mnt/c/Users/hbala/Desktop/Chagas_Physionet2025/output'

In [5]:
with open('all_train_paths.pkl', 'rb') as f:
    train_paths = pickle.load(f)

train_paths = [os.path.join(root_path, path) for path in train_paths]
 
with open('all_val_paths.pkl', 'rb') as f:
    val_paths = pickle.load(f)
    
val_paths = [os.path.join(root_path, path) for path in val_paths]

In [6]:
train_ds = tf_dataloader(train_paths, batch_size=256, shuffle=True)
val_ds = tf_dataloader(val_paths, batch_size=256, shuffle=False)

In [7]:
model = ecg_model()

# with open('class_weights.pkl', 'rb') as f:
#     class_weights = pickle.load(f)
    
model.summary()

### Defining Hybrid Loss Functions

In [None]:

@register_keras_serializable()
def focal_loss(gamma: float = 2.0):
    """Focal Loss (Lin et al.) for binary problems with sigmoid output."""
    def _loss(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        bce = K.binary_crossentropy(y_true, y_pred, from_logits=False)          # already mean-reduced per-sample
        p_t = tf.clip_by_value(y_true * y_pred + (1-y_true)*(1-y_pred), 1e-7, 1-1e-7)
        focal = K.pow(1.0 - p_t, gamma) * bce
        return focal
    return _loss

@register_keras_serializable()
class AUCMarginLoss(tf.keras.losses.Loss):
    def __init__(self, margin: float = 1.0, name="auc_margin"):
        super().__init__(name=name)
        self.margin = margin

    def call(self, y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)

        pos = tf.boolean_mask(y_pred, tf.equal(y_true, 1))
        neg = tf.boolean_mask(y_pred, tf.equal(y_true, 0))

        # Compute the loss only if we have BOTH positives and negatives
        def compute_pairwise():
            pairwise = tf.maximum(
                0.0,
                self.margin - tf.expand_dims(pos, 1) + tf.expand_dims(neg, 0)
            )
            return tf.reduce_mean(pairwise)

        no_pairs = tf.logical_or(tf.equal(tf.size(pos), 0),
                                 tf.equal(tf.size(neg), 0))
        return tf.cond(no_pairs,
                       lambda: tf.constant(0.0, dtype=tf.float32),
                       compute_pairwise)


In [None]:
focal = focal_loss(gamma=2.0)
auc_margin = AUCMarginLoss(margin=1.0)

@register_keras_serializable()
def hybrid_loss(y_true, y_pred):
    return 0.7 * focal(y_true, y_pred) + 0.3 * auc_margin(y_true, y_pred)


#### Dynamic Thresholding based on auc_roc curves

In [None]:
@register_keras_serializable()
class DynamicThreshold(tf.keras.callbacks.Callback):
    def __init__(self, val_ds, threshold_var):
        super().__init__()
        self.val_ds = val_ds
        self.threshold_var = threshold_var

    @tf.function  # speeds up large val sets
    def _collect(self, x):
        return self.model(x, training=False)

    def on_epoch_end(self, epoch, logs=None):
        y_true, y_score = [], []
        for x, y in self.val_ds:
            y_score.append(self._collect(x))
            y_true.append(tf.cast(y, tf.float32))

        y_true  = tf.concat(y_true,  axis=0).numpy().ravel()
        y_score = tf.concat(y_score, axis=0).numpy().ravel()

        fpr, tpr, thresh = roc_curve(y_true, y_score)
        finite = np.isfinite(thresh)         # skip the +inf sentinel
        best = np.argmax(tpr[finite] - fpr[finite])

        best_thr = float(thresh[finite][best])
        self.threshold_var.assign(best_thr)

        if logs is not None:
            logs["val_best_threshold"] = best_thr


In [None]:
@register_keras_serializable()
class TopKTPR(tf.keras.metrics.Metric):
    def __init__(self, k_fraction=0.05, name="tpr_at_top5", **kw):
        super().__init__(name=name, **kw)
        self.k_fraction = float(k_fraction)
        self.tp = self.add_weight(name="tp", initializer="zeros")
        self.p  = self.add_weight(name="p",  initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        # Flatten (works for any output shape)
        y_true = tf.reshape(tf.cast(y_true, tf.float32), [-1])
        y_pred = tf.reshape(tf.cast(y_pred, tf.float32), [-1])

        n = tf.cast(tf.size(y_pred), tf.float32)
        k = tf.cast(tf.math.maximum(1.0, tf.math.ceil(n * self.k_fraction)), tf.int32)

        # ---- efficient mask: no full scatter for large n ----
        topk_idx = tf.math.top_k(y_pred, k=k, sorted=False).indices
        topk_true = tf.gather(y_true, topk_idx)

        self.tp.assign_add(tf.reduce_sum(topk_true))
        self.p.assign_add(tf.reduce_sum(y_true))

    def result(self):
        return self.tp / (self.p + tf.keras.backend.epsilon())

    def reset_states(self):
        self.tp.assign(0.0)
        self.p.assign(0.0)


In [None]:
threshold_var = tf.Variable(0.5, dtype=tf.float32, trainable=False, name="dyn_thresh")

@register_keras_serializable()
def f1_metric(threshold_var):
    def _f1(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        y_pred_bin = tf.cast(tf.greater_equal(y_pred, threshold_var), tf.float32)

        tp = tf.reduce_sum(y_true * y_pred_bin)
        fn = tf.reduce_sum(y_true * (1 - y_pred_bin))
        fp = tf.reduce_sum((1 - y_true) * y_pred_bin)

        precision = tp / (tp + fp + K.epsilon())
        recall    = tp / (tp + fn + K.epsilon())
        return 2 * precision * recall / (precision + recall + K.epsilon())
    return _f1

model.compile(
    optimizer='adam',
    loss=focal_loss(gamma=2.0),  # Use focal loss
    metrics=['accuracy',
             tf.keras.metrics.AUC(name="auc"),
             f1_metric(threshold_var),
             TopKTPR(k_fraction=0.05)]
)


model_checkpoint = ModelCheckpoint(
    'phy_2025_best_weight_model_filtered_paths_2d_focal_loss_7_13_25.keras',
    monitor='val_tpr_at_top5',
    save_best_only=True,
    mode='max'
)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_tpr_at_top5',
    patience=10,
    verbose=1,
    mode='max',
    restore_best_weights=True
)

In [13]:
class_weights = {0: 1.0, 1: 25.0}

In [14]:
dynamic_thresh_cb = DynamicThreshold(val_ds, threshold_var)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=100,
    callbacks=[model_checkpoint, early_stopping, dynamic_thresh_cb],
    verbose=1
)

Epoch 1/100


I0000 00:00:1752367159.723655    2866 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m1141/1141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5595s[0m 5s/step - _f1: 9.9701e-05 - accuracy: 0.9769 - auc: 0.4967 - loss: 0.0416 - tpr_at_top5: 0.0491 - val__f1: 0.0000e+00 - val_accuracy: 0.9769 - val_auc: 0.5000 - val_loss: 0.0326 - val_tpr_at_top5: 0.2433 - val_best_threshold: 0.2084
Epoch 2/100


2025-07-12 21:12:23.512741: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m1141/1141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5562s[0m 5s/step - _f1: 0.0396 - accuracy: 0.9780 - auc: 0.6813 - loss: 0.0294 - tpr_at_top5: 0.1928 - val__f1: 0.0379 - val_accuracy: 0.9769 - val_auc: 0.7625 - val_loss: 0.0294 - val_tpr_at_top5: 0.2641 - val_best_threshold: 0.1874
Epoch 3/100


2025-07-12 22:45:05.590702: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m1141/1141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5574s[0m 5s/step - _f1: 0.0405 - accuracy: 0.9782 - auc: 0.7563 - loss: 0.0275 - tpr_at_top5: 0.2901 - val__f1: 0.0443 - val_accuracy: 0.9769 - val_auc: 0.7849 - val_loss: 0.0292 - val_tpr_at_top5: 0.3039 - val_best_threshold: 0.2258
Epoch 4/100
[1m1141/1141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5602s[0m 5s/step - _f1: 0.0380 - accuracy: 0.9775 - auc: 0.7713 - loss: 0.0277 - tpr_at_top5: 0.3055 - val__f1: 0.0409 - val_accuracy: 0.9769 - val_auc: 0.7927 - val_loss: 0.0284 - val_tpr_at_top5: 0.3294 - val_best_threshold: 0.2215
Epoch 5/100


2025-07-13 01:51:21.267714: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m1141/1141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5518s[0m 5s/step - _f1: 0.0370 - accuracy: 0.9781 - auc: 0.7775 - loss: 0.0268 - tpr_at_top5: 0.3320 - val__f1: 0.0426 - val_accuracy: 0.9769 - val_auc: 0.8047 - val_loss: 0.0290 - val_tpr_at_top5: 0.3401 - val_best_threshold: 0.2296
Epoch 6/100
[1m1141/1141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5506s[0m 5s/step - _f1: 0.0365 - accuracy: 0.9778 - auc: 0.7936 - loss: 0.0266 - tpr_at_top5: 0.3485 - val__f1: 0.0373 - val_accuracy: 0.9769 - val_auc: 0.8135 - val_loss: 0.0271 - val_tpr_at_top5: 0.3596 - val_best_threshold: 0.2090
Epoch 7/100
[1m1141/1141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5511s[0m 5s/step - _f1: 0.0381 - accuracy: 0.9781 - auc: 0.8033 - loss: 0.0258 - tpr_at_top5: 0.3673 - val__f1: 0.0421 - val_accuracy: 0.9769 - val_auc: 0.8236 - val_loss: 0.0275 - val_tpr_at_top5: 0.3715 - val_best_threshold: 0.2109
Epoch 8/100
[1m1141/1141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5515s[0m 5s/st

2025-07-13 07:58:50.959464: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m1141/1141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5513s[0m 5s/step - _f1: 0.0379 - accuracy: 0.9780 - auc: 0.8076 - loss: 0.0258 - tpr_at_top5: 0.3686 - val__f1: 0.0423 - val_accuracy: 0.9772 - val_auc: 0.8180 - val_loss: 0.0273 - val_tpr_at_top5: 0.3484 - val_best_threshold: 0.2182
Epoch 10/100
[1m1141/1141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5524s[0m 5s/step - _f1: 0.0375 - accuracy: 0.9782 - auc: 0.8170 - loss: 0.0255 - tpr_at_top5: 0.3782 - val__f1: 0.0393 - val_accuracy: 0.9769 - val_auc: 0.7822 - val_loss: 0.0281 - val_tpr_at_top5: 0.3050 - val_best_threshold: 0.2013
Epoch 11/100
[1m1141/1141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5581s[0m 5s/step - _f1: 0.0398 - accuracy: 0.9778 - auc: 0.7902 - loss: 0.0266 - tpr_at_top5: 0.3429 - val__f1: 0.0423 - val_accuracy: 0.9770 - val_auc: 0.8120 - val_loss: 0.0268 - val_tpr_at_top5: 0.3466 - val_best_threshold: 0.2062
Epoch 11: early stopping
Restoring model weights from the end of the best epoch: 1.


In [27]:
model.save('phy_2025_best_weight_model_filtered_paths_2d_focal_loss_7_12_25.keras')

In [28]:
# Load model and evaluate on validation set
loaded_model = tf.keras.models.load_model(
    'phy_2025_best_weight_model_filtered_paths_2d_focal_loss_7_12_25.keras',
    custom_objects={'focal_loss': focal_loss, 'f1_metric': f1_metric, 'TopKTPR': TopKTPR, 'DynamicThreshold': DynamicThreshold}
)

TypeError: Could not locate function '_loss'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': 'builtins', 'class_name': 'function', 'config': '_loss', 'registered_name': 'function'}

In [22]:
model.evaluate(val_ds, verbose=1)

[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m397s[0m 1s/step - _f1: 0.0456 - accuracy: 0.9766 - auc: 0.5000 - loss: 0.0328 - tpr_at_top5: 0.2449


[0.032561369240283966,
 0.9769095778465271,
 0.5,
 0.04482297971844673,
 0.2433234453201294]

In [23]:
model.load_weights('phy_2025_best_weight_model_filtered_paths_2d_focal_loss_7_12_25.keras')
model.evaluate(val_ds, verbose=1)

[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m397s[0m 1s/step - _f1: 0.0436 - accuracy: 0.9767 - auc: 0.8361 - loss: 0.0268 - tpr_at_top5: 0.3915


[0.026778055354952812,
 0.977019190788269,
 0.8291621208190918,
 0.04289998859167099,
 0.386350154876709]

In [None]:
# get the best threshold from the dynamic threshold callback


In [17]:
best_thr = dynamic_thresh_cb.threshold_var

In [None]:
# Collect true labels and predicted probabilities from the test dataset.
def conufsion_matrix_plots(val_ds, model): 
    y_true = []
    y_score = []  # predicted probabilities for the positive class

    # Loop over your test dataset (e.g., val_ds)
    for batch_x, batch_y in val_ds:
        preds = model.predict(batch_x, verbose=0)
        y_score.extend(preds.ravel().tolist())
        y_true.extend(batch_y.numpy().ravel().tolist())

    y_true = np.array(y_true)
    y_score = np.array(y_score)

    # Compute ROC curve and AUC
    fpr, tpr, thresholds = roc_curve(y_true, y_score)
    roc_auc = auc(fpr, tpr)

    # Compute best threshold using the Youden index (maximizes TPR - FPR)
    youden_index = tpr - fpr
    best_index = np.argmax(youden_index)
    best_threshold = thresholds[best_index]
    print("Best threshold (Youden index):", best_threshold)

    # Apply the best threshold to get binary predictions
    y_pred = (y_score >= best_threshold).astype(np.int32)

    # Generate classification report
    report = classification_report(y_true, y_pred, target_names=["False", "True"])
    print("Classification Report:")
    print(report)

    # Compute and plot confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=["False", "True"], yticklabels=["False", "True"])
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title("Confusion Matrix")
    plt.show()

    # Plot the ROC curve with best threshold indicated
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color="darkorange", lw=2, label="ROC curve (area = %0.2f)" % roc_auc)
    plt.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--", label="Random classifier")
    plt.scatter(fpr[best_index], tpr[best_index], color="red",
                label="Best threshold = %0.2f" % best_threshold)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("Receiver Operating Characteristic")
    plt.legend(loc="lower right")
    plt.show()