<a href="https://colab.research.google.com/github/Khalid-Elkhidir/Enhanced-DepHNN/blob/main/Enhanced_DepHNN_v1_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Input, Conv1D, MaxPooling1D, LSTM,
                                   GlobalAveragePooling1D, Dense, SpatialDropout1D,
                                   Dropout, BatchNormalization)
from tensorflow.keras.regularizers import l2

def create_dephnn_model(input_shape=(7500, 128)):
    """
    Optimized version with sequence reduction and maintained regularization
    """
    model = Sequential([
        Input(shape=input_shape),

        # Conv Block with early sequence reduction
        Conv1D(128, 5, padding='same', activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        SpatialDropout1D(0.5),
        MaxPooling1D(2),  # Reduces sequence length by 50% (7500 → 3750)

        # Single optimized LSTM layer
        LSTM(32, return_sequences=True,
            dropout=0.3, recurrent_dropout=0.3,  # Increased dropout for single layer
            kernel_regularizer=l2(0.001)),

        # Single optimized LSTM layer
        LSTM(16, return_sequences=True,
            dropout=0.3, recurrent_dropout=0.3,  # Increased dropout for single layer
            kerne l_regularizer=l2(0.001)),


        GlobalAveragePooling1D(),

        # Dense layers with maintained regularization
        Dense(4, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.6),
        Dense(4, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.6),

        Dense(2, activation='softmax')
    ])

    return model

In [None]:
model = create_dephnn_model()
model.summary()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm # For progress bar

In [None]:
!tar -xvzf drive/MyDrive/data.tar.gz

depressed_data/
depressed_data/6/
depressed_data/6/6.csv
depressed_data/6/8.csv
depressed_data/6/2.csv
depressed_data/6/1.csv
depressed_data/6/9.csv
depressed_data/6/4.csv
depressed_data/6/11.csv
depressed_data/6/5.csv
depressed_data/6/3.csv
depressed_data/6/10.csv
depressed_data/6/7.csv
depressed_data/12/
depressed_data/12/6.csv
depressed_data/12/8.csv
depressed_data/12/2.csv
depressed_data/12/1.csv
depressed_data/12/9.csv
depressed_data/12/4.csv
depressed_data/12/11.csv
depressed_data/12/5.csv
depressed_data/12/3.csv
depressed_data/12/10.csv
depressed_data/12/7.csv
depressed_data/22/
depressed_data/22/6.csv
depressed_data/22/8.csv
depressed_data/22/2.csv
depressed_data/22/1.csv
depressed_data/22/9.csv
depressed_data/22/4.csv
depressed_data/22/11.csv
depressed_data/22/5.csv
depressed_data/22/3.csv
depressed_data/22/10.csv
depressed_data/22/7.csv
depressed_data/2/
depressed_data/2/6.csv
depressed_data/2/8.csv
depressed_data/2/2.csv
depressed_data/2/1.csv
depressed_data/2/9.csv
depresse

In [None]:
# sort data

from pandas.core.common import flatten

dirs = ["depressed_data/", "healthy_data/"]
depressed_files = []
healthy_files = []
for dir in dirs:
  patients = os.listdir(dir)
  for patient in patients:
    path = os.path.join(dir, patient)
    sliced_patients = os.listdir(path)
    for i in range(len(list(sliced_patients))):
      sliced_patients = map(lambda x: f"{path}/{x}", sliced_patients)
      if dir == "depressed_data/":
        depressed_files.append(list(sliced_patients))
      else:
        healthy_files.append(list(sliced_patients))
depressed_files = list(flatten(depressed_files))
healthy_files = list(flatten(healthy_files))

In [None]:
# sort data

import re

def sort_file_paths(file_paths):
  def extract_numbers(path):
    # Regular expression to match directory_name/%number_of_folder/%number_of_file.csv
    match = re.search(r'/(\d+)/(\d+)\.csv$', path)
    if match:
      folder_num = int(match.group(1))
      file_num = int(match.group(2))
      return folder_num, file_num
    else:
      return float('inf'), float('inf') # Fallback for paths that don't match

  # Sort the file paths using the extracted numbers
  sorted_paths = sorted(file_paths, key=extract_numbers)
  return sorted_paths

In [None]:
# apply sort for data
depressed_files = sort_file_paths(depressed_files)
healthy_files = sort_file_paths(healthy_files)
print(depressed_files)
print(len(depressed_files), 24*11)
print(healthy_files)
print(len(healthy_files), 29*11)

['depressed_data/1/1.csv', 'depressed_data/1/2.csv', 'depressed_data/1/3.csv', 'depressed_data/1/4.csv', 'depressed_data/1/5.csv', 'depressed_data/1/6.csv', 'depressed_data/1/7.csv', 'depressed_data/1/8.csv', 'depressed_data/1/9.csv', 'depressed_data/1/10.csv', 'depressed_data/1/11.csv', 'depressed_data/2/1.csv', 'depressed_data/2/2.csv', 'depressed_data/2/3.csv', 'depressed_data/2/4.csv', 'depressed_data/2/5.csv', 'depressed_data/2/6.csv', 'depressed_data/2/7.csv', 'depressed_data/2/8.csv', 'depressed_data/2/9.csv', 'depressed_data/2/10.csv', 'depressed_data/2/11.csv', 'depressed_data/3/1.csv', 'depressed_data/3/2.csv', 'depressed_data/3/3.csv', 'depressed_data/3/4.csv', 'depressed_data/3/5.csv', 'depressed_data/3/6.csv', 'depressed_data/3/7.csv', 'depressed_data/3/8.csv', 'depressed_data/3/9.csv', 'depressed_data/3/10.csv', 'depressed_data/3/11.csv', 'depressed_data/4/1.csv', 'depressed_data/4/2.csv', 'depressed_data/4/3.csv', 'depressed_data/4/4.csv', 'depressed_data/4/5.csv', 'depr

In [None]:
pd.read_csv(depressed_files[0], header=None)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,-2672.837646,-12377.481445,-14312.760742,-3315.757812,-788.099976,4314.892578,642.094055,3448.106934,-11753.994141,-13986.407227,...,-3123.212891,-13059.272461,-11648.493164,-6508.694336,-7763.240234,-5026.733398,-10359.708008,-5075.732422,-4333.224609,-13102.478516
1,-2977.063721,-12746.344727,-14707.312500,-3569.262207,-1040.842529,4227.194336,550.038391,3058.005127,-12293.641602,-14325.297852,...,-3365.880615,-13313.056641,-11991.339844,-6960.208496,-8107.444824,-5400.620117,-10692.241211,-5561.928223,-4667.212402,-13434.322266
2,-3185.338135,-13094.179688,-15115.572266,-3842.165039,-1266.216919,4081.433350,428.872314,2736.541504,-12807.960938,-14725.140625,...,-3539.296387,-13475.206055,-12224.307617,-7355.231445,-8494.883789,-5776.714355,-10954.746094,-5816.718262,-4949.989258,-13705.768555
3,-2992.951904,-12997.416992,-14936.188477,-3725.095215,-1122.845337,4100.067871,470.628387,2915.571777,-12555.684570,-14598.958008,...,-3362.336914,-13319.053711,-12041.857422,-7221.004395,-8364.586914,-5590.596680,-10772.967773,-5464.646484,-4759.678711,-13534.032227
4,-2669.601074,-12533.662109,-14451.906250,-3406.648682,-838.063232,4242.860840,607.853882,3360.400879,-11911.901367,-14156.715820,...,-3087.931396,-13086.966797,-11711.237305,-6654.441895,-7913.342285,-5144.039062,-10421.353516,-4974.948242,-4382.511230,-13225.270508
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7495,-3261.916748,-13067.952148,-15193.008789,-3855.806885,-1204.172363,4131.738770,501.006836,2148.865234,-13395.452148,-14747.392578,...,-3423.338623,-13865.372070,-12330.017578,-7388.726562,-8685.933594,-5853.973145,-11009.019531,-5622.233887,-5018.657715,-13782.570312
7496,-3085.499512,-12949.163086,-15039.041992,-3759.335449,-1080.804688,4132.917969,530.366882,2307.975830,-13174.142578,-14648.449219,...,-3225.707275,-13726.908203,-12171.861328,-7220.477539,-8549.373047,-5686.507812,-10841.170898,-5235.382812,-4868.069336,-13659.265625
7497,-2784.610596,-12605.600586,-14599.211914,-3469.177734,-807.542725,4263.772949,661.399963,2692.458008,-12583.140625,-14238.907227,...,-2988.797852,-13495.989258,-11846.751953,-6780.048828,-8169.457031,-5278.381348,-10495.251953,-4760.604004,-4527.652344,-13332.559570
7498,-2743.059082,-12446.564453,-14439.358398,-3361.313477,-740.201172,4337.501465,714.643860,2828.913330,-12360.880859,-14042.306641,...,-3000.029541,-13453.798828,-11751.087891,-6595.995605,-8006.262695,-5134.577637,-10393.405273,-4789.814941,-4441.931641,-13224.370117


In [None]:
import tensorflow as tf

# Configuration
training_ds_size = 0.8
validation_ds_size = 0.1
num_features = 128
batch_size = 7500  # Each sample should have 7500 sequences
feature_defaults = [tf.float32] * num_features  # Adjust dtype if needed
depressed_files_num = 11 * 24
healthy_files_num = 11 * 29

# # Ensure file lists exist
# depressed_files = ["path_to_depressed_file_{}.csv".format(i) for i in range(depressed_files_num)]
# healthy_files = ["path_to_healthy_file_{}.csv".format(i) for i in range(healthy_files_num)]


def create_pipeline(files_list, label):
    """Create a dataset pipeline for a single class."""
    file_ds = tf.data.Dataset.from_tensor_slices(files_list)  # Ensures correct ordering

    def process_file(file_path):
        # Read CSV with 128 features
        csv_ds = tf.data.experimental.CsvDataset(
            file_path,
            record_defaults=[tf.float32] * 128,  # Adjust dtype if needed
            header=False
        )
        # Convert each row tuple to a single tensor (128 features)
        csv_ds = csv_ds.map(lambda *row: tf.stack(row, axis=0))  # Shape: (128,)
        # Batch into (7500, 128) tensors
        return csv_ds.batch(batch_size, drop_remainder=True)  # Shape: (7500, 128)

    data_ds = file_ds.interleave(
        process_file,
        cycle_length=tf.data.AUTOTUNE,
        num_parallel_calls=tf.data.AUTOTUNE
    )

    def add_labels(features):
        # Create proper one-hot labels
        one_hot_label = tf.one_hot(label, depth=2)  # [1,0] or [0,1]
        # labels = tf.tile([one_hot_label], [tf.shape(features)[0], 1])  # Expand to match batch
        return features, one_hot_label

    return data_ds.map(add_labels, num_parallel_calls=tf.data.AUTOTUNE)


# Create datasets for both classes
depressed_ds = create_pipeline(depressed_files, label=0)
healthy_ds = create_pipeline(healthy_files, label=1)

# Combine and shuffle datasets
combined_ds = depressed_ds.concatenate(healthy_ds)
# combined_ds = combined_ds.shuffle(1000).cache()

# Calculate total batches (not files)
# total_batches = depressed_files_num // 11 + healthy_files_num // 11  # 24 + 29 = 53 batches
# train_batches = int(total_batches * training_ds_size)    # 42 batches
# val_batches = int(total_batches * validation_ds_size)    # 5 batches
# test_batches = total_batches - train_batches - val_batches  # 6 batches
total_batches = depressed_files_num + healthy_files_num  # 264 + 319 = 583
train_batches = int(total_batches * training_ds_size) - 1  # ~466
val_batches = int(total_batches * validation_ds_size)  # ~58
test_batches = total_batches - train_batches - val_batches + 1 # ~59

# Create fresh dataset reference
# full_ds = combined_ds.shuffle(10000)
full_ds = combined_ds.shuffle(10000, seed=42)  # Fixed seed shuffle

# Split datasets
# train_ds = full_ds.take(train_batches)
# remaining_ds = full_ds.skip(train_batches)
# val_ds = remaining_ds.take(val_batches)
# test_ds = remaining_ds.skip(val_batches)
train_ds = full_ds.take(train_batches)
val_ds = full_ds.skip(train_batches).take(val_batches).cache()
test_ds = full_ds.skip(train_batches + val_batches)

# # ✅ Apply final batching to introduce an outer batch dimension
# train_ds = train_ds.batch(15).repeat().prefetch(tf.data.AUTOTUNE)  # Now (None, 7500, 128)
# val_ds = val_ds.batch(58).cache().repeat().prefetch(tf.data.AUTOTUNE)
# test_ds = test_ds.batch(12).repeat().prefetch(tf.data.AUTOTUNE)
train_ds = (
    train_ds
    .cache('/tmp/cache_dir')  # Cache after initial loading/preprocessing
    .shuffle(10000, reshuffle_each_iteration=True)  # Critical for per-epoch shuffle
    .repeat()  # Enable multiple epochs
    .batch(15)
    .prefetch(tf.data.AUTOTUNE)
)

# Validation/test optimizations (no repeat needed)
val_ds = val_ds.batch(val_batches).prefetch(1)  # Full validation in single batch
test_ds = test_ds.batch(test_batches).prefetch(1)

# Optimize pipeline performance
# train_ds = train_ds.prefetch(tf.data.AUTOTUNE)
# val_ds = val_ds.prefetch(tf.data.AUTOTUNE)
# test_ds = test_ds.prefetch(tf.data.AUTOTUNE)

# Print dataset specs
print(train_ds.element_spec)
print(val_ds.element_spec)
print(test_ds.element_spec)


(TensorSpec(shape=(None, 7500, 128), dtype=tf.float32, name=None), TensorSpec(shape=(None, 2), dtype=tf.float32, name=None))
(TensorSpec(shape=(None, 7500, 128), dtype=tf.float32, name=None), TensorSpec(shape=(None, 2), dtype=tf.float32, name=None))
(TensorSpec(shape=(None, 7500, 128), dtype=tf.float32, name=None), TensorSpec(shape=(None, 2), dtype=tf.float32, name=None))


In [None]:
# Print dataset sizes
# print(f"Total samples: {len(depressed_files) + len(healthy_files)}")
# print(f"Training: {train_batches}, Val: {val_batches}, Test: {test_batches}")
# for i in train_ds.take(1):
#   print(i[0].shape, i[1].shape)
#   print(i)

In [None]:
# Add after model definition, before compilation
model.build(input_shape=(None, 7500, 128))  # Explicit batch dimension

In [None]:
tn = tf.keras.metrics.TrueNegatives()
fp = tf.keras.metrics.FalsePositives()

def specificity(y_true, y_pred):

    tn.update_state(y_true, y_pred)
    fp.update_state(y_true, y_pred)

    return tn.result() / (tn.result() + fp.result())

In [None]:
from tensorflow.keras.optimizers import Adam

# compile model
model.compile(
    optimizer=Adam(),  # Reduced from default 1e-3
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.Recall(name='recall'),  # Keep existing recall
        tf.keras.metrics.AUC(name='auc'),        # Keep AUC
        specificity,                             # Keep custom specificity
         tf.keras.metrics.Precision(name='precision')              # New precision metric
    ]
)

In [None]:
# train the model
history = model.fit(train_ds, validation_data=val_ds, epochs=3, steps_per_epoch=31, validation_steps=1, verbose=2)

Epoch 1/3
31/31 - 681s - 22s/step - accuracy: 0.5097 - auc: 0.4954 - loss: 1.0109 - precision: 0.5166 - recall: 0.5032 - specificity: 0.5818 - val_accuracy: 0.5690 - val_auc: 0.5624 - val_loss: 0.9778 - val_precision: 0.5690 - val_recall: 0.5690 - val_specificity: 0.5335
Epoch 2/3
31/31 - 668s - 22s/step - accuracy: 0.5226 - auc: 0.5263 - loss: 0.9803 - precision: 0.5226 - recall: 0.5226 - specificity: 0.5282 - val_accuracy: 0.6034 - val_auc: 0.6335 - val_loss: 0.9647 - val_precision: 0.6034 - val_recall: 0.6034 - val_specificity: 0.5325
Epoch 3/3
31/31 - 669s - 22s/step - accuracy: 0.5312 - auc: 0.5195 - loss: 0.9642 - precision: 0.5312 - recall: 0.5312 - specificity: 0.5325 - val_accuracy: 0.5690 - val_auc: 0.6026 - val_loss: 0.9505 - val_precision: 0.5690 - val_recall: 0.5690 - val_specificity: 0.5335


In [None]:
test_metrics = model.evaluate(test_ds, steps=1, verbose=2, return_dict=True)

1/1 - 182s - 182s/step - accuracy: 1.0000 - auc: 1.0000 - loss: 0.2102 - precision: 1.0000 - recall: 1.0000 - specificity: 1.0000


In [None]:
import matplotlib.pyplot as plt

In [None]:
def plot_metrics(history, test_results):
    """
    Plots separate graphs for loss, accuracy, sensitivity, specificity, and AUC.
    Each graph compares Training & Test results.

    Args:
        history: Training history from model.fit()
        test_results: Dictionary of test metrics from model.evaluate()
    """
    metrics = ['loss', 'accuracy', 'recall', 'specificity', 'auc']

    plt.figure(figsize=(15, 10))

    for i, metric in enumerate(metrics):
        plt.figure(figsize=(6, 4))  # Separate figure for each metric

        # Plot training history
        plt.plot(history.history[metric], label='Training', marker='o')
        plt.plot(history.history[f'val_{metric}'], label='Validation', marker='s')

        # Plot test result (as a horizontal line)
        test_value = test_results[metric]  # Extract test metric
        plt.axhline(y=test_value, color='r', linestyle='--', label=f'Test {metric.capitalize()}')

        # Formatting
        plt.xlabel('Epochs')
        plt.ylabel(metric.capitalize())
        plt.title(f'{metric.capitalize()} Over Training')
        plt.legend()
        plt.grid(True)

        # Show each graph
        plt.show()


In [None]:
test_metrics

{'accuracy': 1.0,
 'auc': 1.0,
 'loss': 0.2101854681968689,
 'precision': 1.0,
 'recall': 1.0,
 'specificity': 1.0}

<keras.src.callbacks.history.History at 0x7cc3041cc410>

In [None]:
plot_metrics(model.history, test_metrics)

KeyError: 'loss'

<Figure size 1500x1000 with 0 Axes>

<Figure size 600x400 with 0 Axes>

In [None]:
# save in keras native format
model.save("khalid-abubakr-DepHNN-v2.0.keras")

In [None]:
# save in legacy h5 format
model.save("khalid-abubakr-DepHNN-v2.0.h5")



In [None]:
from sklearn.metrics import precision_recall_curve, roc_curve

# Get true labels and predictions for the validation set
y_true = []
y_pred = []
for features, labels in val_ds.as_numpy_iterator():
    y_true.extend(labels)
    y_pred.extend(model.predict(features))

# Convert to NumPy arrays
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Find optimal threshold using precision-recall curve
precision, recall, thresholds = precision_recall_curve(y_true[:,1], y_pred[:,1])
f1_scores = 2 * (precision * recall) / (precision + recall)
optimal_idx = tf.argmax(f1_scores)
optimal_threshold = thresholds[optimal_idx]
print(f"Optimal threshold: {optimal_threshold}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
Optimal threshold: 0.29191267490386963


In [None]:
# Apply new threshold to predictions
new_predictions = (y_pred[:,1] >= optimal_threshold).astype(int)
new_predictions

array([0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1,
       0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1])

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Get true labels and predictions for the validation set
y_true = []
y_pred = []
for features, labels in test_ds.as_numpy_iterator():
    y_true.extend(labels)
    y_pred.extend(model.predict(features))

# Convert to NumPy arrays
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Define thresholds
default_threshold = 0.5
optimal_threshold = 0.65  # Replace with your optimal threshold from validation

# Convert probabilities to binary predictions
default_preds = (y_pred[:,1] >= default_threshold).astype(int)
tuned_preds = (y_pred[:,1] >= optimal_threshold).astype(int)

# Calculate metrics for default threshold (0.5)
default_accuracy = accuracy_score(y_true[:,1], default_preds)
default_precision = precision_score(y_true[:,1], default_preds)
default_recall = recall_score(y_true[:,1], default_preds)
default_f1 = f1_score(y_true[:,1], default_preds)

# Calculate metrics for tuned threshold
tuned_accuracy = accuracy_score(y_true[:,1], tuned_preds)
tuned_precision = precision_score(y_true[:,1], tuned_preds)
tuned_recall = recall_score(y_true[:,1], tuned_preds)
tuned_f1 = f1_score(y_true[:,1], tuned_preds)

# Calculate binary cross-entropy loss
loss_fn = tf.keras.losses.BinaryCrossentropy()
default_loss = loss_fn(y_true[:,1], y_pred[:,1]).numpy()
tuned_loss = loss_fn(y_true[:,1], y_pred[:,1]).numpy()  # Loss is same unless threshold affects probs

# Print results
print("Metrics with Default Threshold (0.5):")
print(f"Accuracy: {default_accuracy:.4f}")
print(f"Precision: {default_precision:.4f}")
print(f"Recall: {default_recall:.4f}")
print(f"F1-Score: {default_f1:.4f}")
print(f"Loss: {default_loss:.4f}")

print("\nMetrics with Tuned Threshold (0.65):")
print(f"Accuracy: {tuned_accuracy:.4f}")
print(f"Precision: {tuned_precision:.4f}")
print(f"Recall: {tuned_recall:.4f}")
print(f"F1-Score: {tuned_f1:.4f}")
print(f"Loss: {tuned_loss:.4f}")  # Note: Loss is based on probs, not binary preds

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2s/step
Metrics with Default Threshold (0.5):
Accuracy: 0.9500
Precision: 1.0000
Recall: 0.9000
F1-Score: 0.9474
Loss: 0.2254

Metrics with Tuned Threshold (0.65):
Accuracy: 0.9500
Precision: 1.0000
Recall: 0.9000
F1-Score: 0.9474
Loss: 0.2254


In [None]:
test_metrics = model.evaluate(test_ds, steps=1, verbose=2, return_dict=True)

1/1 - 214s - 214s/step - accuracy: 1.0000 - auc: 1.0000 - loss: 0.2102 - precision: 1.0000 - recall: 1.0000 - specificity: 1.0000


In [None]:
#Extract data from test_ds into NumPy arrays
test_features = []
for features, _ in test_ds.as_numpy_iterator():  # _ to ignore labels
    test_features.extend(features)
test_features = np.array(test_features).reshape(-1, 1)

In [None]:
from sklearn.linear_model import LogisticRegression

# Reshape predictions to match true labels (if necessary)
# Assuming y_pred has shape (num_samples, num_classes)
# Adjust if y_pred has a different shape
y_pred = y_pred.reshape(y_true.shape[0], -1)

# Extract features and true labels
test_features = y_pred  # Use predictions as features for calibration
true_labels = y_true[:, 1]  # Assuming you're interested in the second class

# Fit a logistic regression to calibrate probabilities
calibrator = LogisticRegression()
calibrator.fit(test_features, true_labels)

# Calibrated predictions
calibrated_probs = calibrator.predict_proba(test_features)[:, 1]
calibrated_preds = (calibrated_probs >= 0.5).astype(int)

# Evaluate (optional: compare loss before/after)
from tensorflow.keras.losses import BinaryCrossentropy
loss_fn = BinaryCrossentropy()
print(f"Original loss: {loss_fn(true_labels, test_features[:, 1]).numpy()}")  # Assuming 2 classes
print(f"Calibrated loss: {loss_fn(true_labels, calibrated_probs).numpy()}")

Original loss: 0.22543393075466156
Calibrated loss: 0.24646860361099243


In [None]:
model = tf.keras.models.load_model("khalid-abubakr-DepHNN-v2.4.keras",  custom_objects={'specificity': specificity})

In [None]:
# compile model
model.compile(
    optimizer=Adam(),  # Reduced from default 1e-3
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.Recall(name='recall'),  # Keep existing recall
        tf.keras.metrics.AUC(name='auc'),        # Keep AUC
        specificity,                             # Keep custom specificity
         tf.keras.metrics.Precision(name='precision')              # New precision metric
    ]
)

In [None]:
# train the model
history = model.fit(train_ds, validation_data=val_ds, epochs=30, steps_per_epoch=31, validation_steps=1, verbose=2)

Epoch 1/30
31/31 - 758s - 24s/step - accuracy: 0.8387 - auc: 0.8932 - loss: 0.4777 - precision: 0.8387 - recall: 0.8387 - specificity: 0.8567 - val_accuracy: 1.0000 - val_auc: 1.0000 - val_loss: 0.2252 - val_precision: 1.0000 - val_recall: 1.0000 - val_specificity: 0.8566
Epoch 2/30
31/31 - 324s - 10s/step - accuracy: 0.8581 - auc: 0.9138 - loss: 0.4184 - precision: 0.8581 - recall: 0.8581 - specificity: 0.8520 - val_accuracy: 1.0000 - val_auc: 1.0000 - val_loss: 0.2216 - val_precision: 1.0000 - val_recall: 1.0000 - val_specificity: 0.8652
Epoch 3/30
31/31 - 325s - 10s/step - accuracy: 0.8237 - auc: 0.8961 - loss: 0.4406 - precision: 0.8237 - recall: 0.8237 - specificity: 0.8574 - val_accuracy: 1.0000 - val_auc: 1.0000 - val_loss: 0.2213 - val_precision: 1.0000 - val_recall: 1.0000 - val_specificity: 0.8579
Epoch 4/30
31/31 - 324s - 10s/step - accuracy: 0.8538 - auc: 0.9117 - loss: 0.4411 - precision: 0.8538 - recall: 0.8538 - specificity: 0.8574 - val_accuracy: 0.8793 - val_auc: 0.929

In [None]:
# save in keras native format
version = 2.5
model.save(f"khalid-abubakr-DepHNN-v{version}.keras")

In [None]:
from google.colab import files

files.download(f"/content/khalid-abubakr-DepHNN-v{version}.keras")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>