In [1]:
# Import dependencies
import os
os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


import pandas as pd
import numpy as np
import tensorflow as tf
import logging
from tqdm import tqdm

# Import Tensorflow Keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, BinaryFocalCrossentropy
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

# Import local modules
from src.utils.consts import TF_RECORD_DATASET, MODELS_PATH, TF_BUFFER_SIZE, NUM_CLASSES, TF_SHUFFLE_SIZE, TF_BATCH_SIZE
from src.model.tensorflow_utils import load_dataset, apply_augmentation_to_dataset, oversample_minority_classes, optimize_dataset, count_dataset_size
from src.model.tensorflow_utils import setup_logger, setup_training_logger, setup_metrics_monitor, setup_loss_monitor, setup_garbage_collector, get_metrics
from src.model.tensorflow_utils import calculate_class_weights, show_class_weights, start_or_resume_training, analyze_class_distribution
from src.model.densnet.tensorflow_dense_net_121 import build_densenet121
from src.model.loss.tensorflow_no_finding_binary_crossentropy import NoFindingBinaryCrossentropy

# Input Data
initial_epoch   = 28
resume_training = True
checkpoint_path = '/Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v3/checkpoints/cp-0027.keras'
model_name      = "DenseNet121_v3"

In [2]:
train_ds = load_dataset(f"{TF_RECORD_DATASET}/train.tfrecord", TF_BUFFER_SIZE)
val_ds   = load_dataset(f"{TF_RECORD_DATASET}/val.tfrecord", TF_BUFFER_SIZE)
test_ds  = load_dataset(f"{TF_RECORD_DATASET}/test.tfrecord", TF_BUFFER_SIZE)

I0000 00:00:1743068898.769091 10624261 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1743068898.769294 10624261 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
# Optimize Dataset for rare clasess
class_weights = calculate_class_weights(train_ds, NUM_CLASSES)
train_ds      = oversample_minority_classes(train_ds, class_weights)
class_weights = calculate_class_weights(train_ds, NUM_CLASSES)

In [4]:
steps_per_epoch  = int(count_dataset_size(train_ds, None) / TF_BATCH_SIZE)
validation_steps = int(count_dataset_size(val_ds, None) / TF_BATCH_SIZE)

# Testing
train_ds = train_ds.shuffle(TF_SHUFFLE_SIZE, reshuffle_each_iteration=True)
train_ds = apply_augmentation_to_dataset(train_ds)
train_ds = optimize_dataset(train_ds, TF_BATCH_SIZE)

val_ds  = optimize_dataset(val_ds, TF_BATCH_SIZE)

Counting samples: 108109 samples [01:22, 1310.56 samples/s] 
Counting samples: 15391 samples [00:19, 790.85 samples/s]


In [5]:
# Setup Model Deps
# Setup Loggers
logger            = setup_logger()
training_logger   = setup_training_logger(logger, TF_BATCH_SIZE, 100)
metrics_monitor   = setup_metrics_monitor(MODELS_PATH, model_name, logger, resume_training=resume_training, initial_epoch=initial_epoch)
loss_monitor      = setup_loss_monitor(MODELS_PATH, model_name, logger, val_ds, resume_training=resume_training, initial_epoch=initial_epoch)
garbage_collector = setup_garbage_collector(logger)
metrics           = get_metrics()

# Setup compile arguments
loss       = NoFindingBinaryCrossentropy(10, lambda_value=0.2, from_logits=False, label_smoothing=0.01)
reduce_lr  = ReduceLROnPlateau(monitor="val_f1_score", factor=0.5,  patience=3, min_lr=1e-6, mode="max", verbose=1)

epoch_mode           = 'cp-{epoch:04d}'
save_checkpoint_path = f"{MODELS_PATH}/{model_name}/checkpoints/{epoch_mode}.keras"
checkpoint           = ModelCheckpoint(save_checkpoint_path, monitor="val_f1_score", save_best_only=False, mode="max")

model_path      = f"{MODELS_PATH}/{model_name}.keras"
best_checkpoint = ModelCheckpoint(model_path, monitor="val_f1_score", save_best_only=True, mode="max")

2025-03-27 10:52:55 - INFO - Resuming from existing metrics file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v3/train_metrics.csv
2025-03-27 10:52:55 - INFO - Resuming from existing validation metrics file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v3/val_metrics.csv
2025-03-27 10:52:55 - INFO - Cleaned training metrics file, kept 91206 records before epoch 28
2025-03-27 10:52:55 - INFO - Cleaned validation metrics file, kept 27 records before epoch 28
2025-03-27 10:52:55 - INFO - Found 91206 existing training records
2025-03-27 10:52:55 - INFO - Found 27 existing validation records
2025-03-27 10:52:55 - INFO - Cleaned loss analysis metrics file, kept 405 records before epoch 28
2025-03-27 10:52:55 - INFO - Resuming from existing loss analysis file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v3/loss_analysis_metrics.csv


In [6]:
# Model Training
model          = build_densenet121(NUM_CLASSES, use_se=True)
compile_kwargs = {'optimizer': Adam(learning_rate=1e-4, clipnorm=1.0), 'loss': loss, 'metrics': metrics}

history, model = start_or_resume_training(
    model, 
    compile_kwargs, 
    train_ds, 
    val_ds, 
    30,
    steps_per_epoch, 
    validation_steps, 
    class_weights=class_weights,
    callbacks=[checkpoint, best_checkpoint, reduce_lr, training_logger, metrics_monitor, loss_monitor, garbage_collector], 
    checkpoint_path=checkpoint_path,
    initial_epoch=initial_epoch,
    output_dir=MODELS_PATH,
    model_name=model_name,
    logger=logger
)

2025-03-27 10:52:57 - INFO - Resuming from existing metrics file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v3/train_metrics.csv
2025-03-27 10:52:57 - INFO - Resuming from existing validation metrics file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v3/val_metrics.csv
2025-03-27 10:52:57 - INFO - Cleaned training metrics file, kept 91206 records before epoch 28
2025-03-27 10:52:57 - INFO - Cleaned validation metrics file, kept 27 records before epoch 28
2025-03-27 10:52:57 - INFO - Found 91206 existing training records
2025-03-27 10:52:57 - INFO - Found 27 existing validation records
2025-03-27 10:52:57 - INFO - Cleaned loss analysis metrics file, kept 405 records before epoch 28
2025-03-27 10:52:57 - INFO - Resuming from existing loss analysis file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v3/loss_analysis_metrics.csv


Loading full model from checkpoint: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v3/checkpoints/cp-0027.keras


2025-03-27 10:53:00 - INFO - 
=== Training Started ===

2025-03-27 10:53:00 - INFO - Batch Size: 32
2025-03-27 10:53:00 - INFO - Optimizer: Adam
2025-03-27 10:53:00 - INFO - 

2025-03-27 10:53:00 - INFO - 
=== Starting Epoch 28 ===



Epoch 28/30
[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 663ms/step - accuracy: 0.9174 - auc: 0.7974 - f1_score: 0.2476 - loss: 0.2015 - precision: 0.6867 - recall: 0.1864   

2025-03-27 11:33:27 - INFO - 
=== Epoch 28 Summary ===
2025-03-27 11:33:27 - INFO - Time: 2427.09s
2025-03-27 11:33:27 - INFO - Training   - accuracy: 0.9170 - auc: 0.7947 - f1_score: 0.2417 - loss: 0.2019 - precision: 0.6798 - recall: 0.1815 - learning_rate: 0.0001
2025-03-27 11:33:27 - INFO - Validation - accuracy: 0.9228 - auc: 0.7745 - f1_score: 0.2781 - loss: 0.3155 - precision: 0.6598 - recall: 0.2024



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

2025-03-27 11:35:14 - INFO - 
Loss Analysis - Atelectasis
2025-03-27 11:35:14 - INFO - Confidence Distribution:
2025-03-27 11:35:14 - INFO - -- High (>0.9): 0.00%
2025-03-27 11:35:14 - INFO - -- Medium (0.6-0.9): 0.01%
2025-03-27 11:35:14 - INFO - -- Uncertain (0.4-0.6): 0.43%
2025-03-27 11:35:14 - INFO - -- Low (<0.4): 99.56%
2025-03-27 11:35:14 - INFO - Performance:
2025-03-27 11:35:14 - INFO - -- True Positives: 8
2025-03-27 11:35:14 - INFO - -- False Positives: 5
2025-03-27 11:35:14 - INFO - -- Loss Contribution: 0.3163
2025-03-27 11:35:14 - INFO - Average Confidence:
2025-03-27 11:35:14 - INFO - -- Correct Predictions: 6.54%
2025-03-27 11:35:14 - INFO - -- Incorrect Predictions: 13.83%
2025-03-27 11:35:14 - INFO - 
Loss Analysis - Cardiomegaly
2025-03-27 11:35:14 - INFO - Confidence Distribution:
2025-03-27 11:35:14 - INFO - -- High (>0.9): 0.26%
2025-03-27 11:35:14 - INFO - -- Medium (0.6-0.9): 1.22%
2025-03-27 11:35:14 - INFO - -- Uncertain (0.4-0.6): 1.97%
2025-03-27 11:35:14 -

[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2541s[0m 725ms/step - accuracy: 0.9174 - auc: 0.7974 - f1_score: 0.2476 - loss: 0.2015 - precision: 0.6867 - recall: 0.1864 - val_accuracy: 0.9228 - val_auc: 0.7745 - val_f1_score: 0.2781 - val_loss: 0.3155 - val_precision: 0.6598 - val_recall: 0.2024 - learning_rate: 1.0000e-04


2025-03-27 11:35:21 - INFO - 
=== Starting Epoch 29 ===



Epoch 29/30
[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 735ms/step - accuracy: 0.9167 - auc: 0.7903 - f1_score: 0.2358 - loss: 0.2040 - precision: 0.6811 - recall: 0.1770   

2025-03-27 12:19:04 - INFO - 
=== Epoch 29 Summary ===
2025-03-27 12:19:04 - INFO - Time: 2623.48s
2025-03-27 12:19:04 - INFO - Training   - accuracy: 0.9166 - auc: 0.7911 - f1_score: 0.2357 - loss: 0.2034 - precision: 0.6775 - recall: 0.1767 - learning_rate: 0.0001
2025-03-27 12:19:04 - INFO - Validation - accuracy: 0.9217 - auc: 0.7664 - f1_score: 0.2401 - loss: 0.3225 - precision: 0.6807 - recall: 0.1599



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

2025-03-27 12:20:51 - INFO - 
Loss Analysis - Atelectasis
2025-03-27 12:20:51 - INFO - Confidence Distribution:
2025-03-27 12:20:51 - INFO - -- High (>0.9): 0.00%
2025-03-27 12:20:51 - INFO - -- Medium (0.6-0.9): 0.17%
2025-03-27 12:20:51 - INFO - -- Uncertain (0.4-0.6): 2.49%
2025-03-27 12:20:51 - INFO - -- Low (<0.4): 97.34%
2025-03-27 12:20:51 - INFO - Performance:
2025-03-27 12:20:51 - INFO - -- True Positives: 48
2025-03-27 12:20:51 - INFO - -- False Positives: 60
2025-03-27 12:20:51 - INFO - -- Loss Contribution: 0.3155
2025-03-27 12:20:51 - INFO - Average Confidence:
2025-03-27 12:20:51 - INFO - -- Correct Predictions: 11.99%
2025-03-27 12:20:51 - INFO - -- Incorrect Predictions: 21.48%
2025-03-27 12:20:51 - INFO - 
Loss Analysis - Cardiomegaly
2025-03-27 12:20:51 - INFO - Confidence Distribution:
2025-03-27 12:20:51 - INFO - -- High (>0.9): 0.00%
2025-03-27 12:20:51 - INFO - -- Medium (0.6-0.9): 0.21%
2025-03-27 12:20:51 - INFO - -- Uncertain (0.4-0.6): 0.29%
2025-03-27 12:20:5

[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2743s[0m 797ms/step - accuracy: 0.9167 - auc: 0.7903 - f1_score: 0.2358 - loss: 0.2040 - precision: 0.6811 - recall: 0.1770 - val_accuracy: 0.9217 - val_auc: 0.7664 - val_f1_score: 0.2401 - val_loss: 0.3225 - val_precision: 0.6807 - val_recall: 0.1599 - learning_rate: 1.0000e-04


2025-03-27 12:21:04 - INFO - 
=== Starting Epoch 30 ===



Epoch 30/30
[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 700ms/step - accuracy: 0.9164 - auc: 0.7889 - f1_score: 0.2309 - loss: 0.2051 - precision: 0.6755 - recall: 0.1747   

2025-03-27 13:02:42 - INFO - 
=== Epoch 30 Summary ===
2025-03-27 13:02:42 - INFO - Time: 2497.97s
2025-03-27 13:02:42 - INFO - Training   - accuracy: 0.9165 - auc: 0.7906 - f1_score: 0.2342 - loss: 0.2039 - precision: 0.6736 - recall: 0.1759 - learning_rate: 0.0001
2025-03-27 13:02:42 - INFO - Validation - accuracy: 0.9185 - auc: 0.7527 - f1_score: 0.1640 - loss: 0.3305 - precision: 0.6535 - recall: 0.0998



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

2025-03-27 13:04:29 - INFO - 
Loss Analysis - Atelectasis
2025-03-27 13:04:29 - INFO - Confidence Distribution:
2025-03-27 13:04:29 - INFO - -- High (>0.9): 0.00%
2025-03-27 13:04:29 - INFO - -- Medium (0.6-0.9): 0.06%
2025-03-27 13:04:29 - INFO - -- Uncertain (0.4-0.6): 1.59%
2025-03-27 13:04:29 - INFO - -- Low (<0.4): 98.35%
2025-03-27 13:04:29 - INFO - Performance:
2025-03-27 13:04:29 - INFO - -- True Positives: 32
2025-03-27 13:04:29 - INFO - -- False Positives: 27
2025-03-27 13:04:29 - INFO - -- Loss Contribution: 0.3063
2025-03-27 13:04:29 - INFO - Average Confidence:
2025-03-27 13:04:29 - INFO - -- Correct Predictions: 10.45%
2025-03-27 13:04:29 - INFO - -- Incorrect Predictions: 19.48%
2025-03-27 13:04:29 - INFO - 
Loss Analysis - Cardiomegaly
2025-03-27 13:04:29 - INFO - Confidence Distribution:
2025-03-27 13:04:29 - INFO - -- High (>0.9): 0.15%
2025-03-27 13:04:29 - INFO - -- Medium (0.6-0.9): 1.06%
2025-03-27 13:04:29 - INFO - -- Uncertain (0.4-0.6): 1.23%
2025-03-27 13:04:2

[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2614s[0m 759ms/step - accuracy: 0.9164 - auc: 0.7889 - f1_score: 0.2309 - loss: 0.2051 - precision: 0.6755 - recall: 0.1747 - val_accuracy: 0.9185 - val_auc: 0.7527 - val_f1_score: 0.1640 - val_loss: 0.3305 - val_precision: 0.6535 - val_recall: 0.0998 - learning_rate: 1.0000e-04


2025-03-27 13:04:38 - INFO - 
=== Training Completed! ===

2025-03-27 13:04:38 - INFO - Final Metrics: accuracy: 0.9165 - auc: 0.7906 - f1_score: 0.2342 - loss: 0.2039 - precision: 0.6736 - recall: 0.1759 - val_accuracy: 0.9185 - val_auc: 0.7527 - val_f1_score: 0.1640 - val_loss: 0.3305 - val_precision: 0.6535 - val_recall: 0.0998

