In [1]:
# Import dependencies
import os
os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import pandas as pd
import numpy as np
import tensorflow as tf
import logging
from tqdm import tqdm

# Import Tensorflow Keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, BinaryFocalCrossentropy
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

# Import local modules
from src.utils.consts import TF_RECORD_DATASET, MODELS_PATH, TF_BUFFER_SIZE, NUM_CLASSES, TF_SHUFFLE_SIZE, TF_BATCH_SIZE
from src.model.tensorflow_utils import load_dataset, apply_augmentation_to_dataset, oversample_minority_classes, optimize_dataset, count_dataset_size
from src.model.tensorflow_utils import setup_logger, setup_training_logger, setup_metrics_monitor, setup_loss_monitor, setup_garbage_collector, get_metrics
from src.model.tensorflow_utils import calculate_class_weights, show_class_weights, start_or_resume_training, analyze_class_distribution
from src.model.densnet.tensorflow_dense_net_121 import build_densenet121

# Input Data
initial_epoch   = 28
resume_training = True
checkpoint_path = '/Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v1/checkpoints/cp-0027.keras'
model_name      = "DenseNet121_v1"

In [2]:
train_ds = load_dataset(f"{TF_RECORD_DATASET}/train.tfrecord", TF_BUFFER_SIZE)
val_ds   = load_dataset(f"{TF_RECORD_DATASET}/val.tfrecord", TF_BUFFER_SIZE)
test_ds  = load_dataset(f"{TF_RECORD_DATASET}/test.tfrecord", TF_BUFFER_SIZE)

I0000 00:00:1742667976.305528 24492848 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1742667976.305856 24492848 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
# Optimize Dataset for rare clasess
class_weights = calculate_class_weights(train_ds, NUM_CLASSES)
train_ds      = oversample_minority_classes(train_ds, class_weights)
class_weights = calculate_class_weights(train_ds, NUM_CLASSES)

In [4]:
steps_per_epoch  = int(count_dataset_size(train_ds, None) / TF_BATCH_SIZE)
validation_steps = int(count_dataset_size(val_ds, None) / TF_BATCH_SIZE)

# Testing
train_ds = train_ds.shuffle(TF_SHUFFLE_SIZE, reshuffle_each_iteration=True)
train_ds = apply_augmentation_to_dataset(train_ds)
train_ds = optimize_dataset(train_ds, TF_BATCH_SIZE)

val_ds  = optimize_dataset(val_ds, TF_BATCH_SIZE)

Counting samples: 108109 samples [01:09, 1545.77 samples/s] 
Counting samples: 15391 samples [00:17, 905.04 samples/s]


In [5]:
# Setup Model Deps
# Setup Loggers
logger            = setup_logger()
training_logger   = setup_training_logger(logger, TF_BATCH_SIZE, 100)
metrics_monitor   = setup_metrics_monitor(MODELS_PATH, model_name, logger, resume_training=resume_training, initial_epoch=initial_epoch)
loss_monitor      = setup_loss_monitor(MODELS_PATH, model_name, logger, val_ds, resume_training=resume_training, initial_epoch=initial_epoch)
garbage_collector = setup_garbage_collector(logger)
metrics           = get_metrics()

# Setup compile arguments
focal_loss = BinaryCrossentropy(from_logits=False, label_smoothing=0.01)
reduce_lr  = ReduceLROnPlateau(monitor="val_f1_score", factor=0.5,  patience=3, min_lr=1e-6, mode="max", verbose=1)

epoch_mode           = 'cp-{epoch:04d}'
save_checkpoint_path = f"{MODELS_PATH}/{model_name}/checkpoints/{epoch_mode}.keras"
checkpoint           = ModelCheckpoint(save_checkpoint_path, monitor="val_f1_score", save_best_only=False, mode="max")

model_path      = f"{MODELS_PATH}/{model_name}.keras"
best_checkpoint = ModelCheckpoint(model_path, monitor="val_f1_score", save_best_only=True, mode="max")

2025-03-22 19:30:12 - INFO - Resuming from existing metrics file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v1/train_metrics.csv
2025-03-22 19:30:12 - INFO - Resuming from existing validation metrics file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v1/val_metrics.csv
2025-03-22 19:30:12 - INFO - Cleaned training metrics file, kept 91206 records before epoch 28
2025-03-22 19:30:12 - INFO - Cleaned validation metrics file, kept 27 records before epoch 28
2025-03-22 19:30:12 - INFO - Found 91206 existing training records
2025-03-22 19:30:12 - INFO - Found 27 existing validation records
2025-03-22 19:30:12 - INFO - Cleaned loss analysis metrics file, kept 420 records before epoch 28
2025-03-22 19:30:12 - INFO - Resuming from existing loss analysis file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v1/loss_analysis_metrics.csv


In [6]:
# Model Training
model          = build_densenet121(NUM_CLASSES)
compile_kwargs = {'optimizer': Adam(learning_rate=1e-4, clipnorm=1.0), 'loss': focal_loss, 'metrics': metrics}

history, model = start_or_resume_training(
    model, 
    compile_kwargs, 
    train_ds, 
    val_ds, 
    30,
    steps_per_epoch, 
    validation_steps, 
    class_weights=class_weights,
    callbacks=[checkpoint, best_checkpoint, reduce_lr, training_logger, metrics_monitor, loss_monitor, garbage_collector], 
    checkpoint_path=checkpoint_path,
    initial_epoch=initial_epoch,
    output_dir=MODELS_PATH,
    model_name=model_name,
    logger=logger
)

2025-03-22 19:30:13 - INFO - Resuming from existing metrics file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v1/train_metrics.csv
2025-03-22 19:30:13 - INFO - Resuming from existing validation metrics file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v1/val_metrics.csv
2025-03-22 19:30:13 - INFO - Cleaned training metrics file, kept 91206 records before epoch 28
2025-03-22 19:30:13 - INFO - Cleaned validation metrics file, kept 27 records before epoch 28
2025-03-22 19:30:13 - INFO - Found 91206 existing training records
2025-03-22 19:30:13 - INFO - Found 27 existing validation records
2025-03-22 19:30:13 - INFO - Cleaned loss analysis metrics file, kept 420 records before epoch 28
2025-03-22 19:30:13 - INFO - Resuming from existing loss analysis file: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v1/loss_analysis_metrics.csv


Loading full model from checkpoint: /Users/piotr.r/Projects/codebook/studies/bachelor-thesis/models/DenseNet121_v1/checkpoints/cp-0027.keras


2025-03-22 19:30:15 - INFO - 
=== Training Started ===

2025-03-22 19:30:15 - INFO - Batch Size: 32
2025-03-22 19:30:15 - INFO - Optimizer: Adam
2025-03-22 19:30:15 - INFO - 

2025-03-22 19:30:15 - INFO - 
=== Starting Epoch 28 ===



Epoch 28/30
[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 547ms/step - accuracy: 0.9083 - auc: 0.7881 - f1_score: 0.1457 - loss: 0.1746 - precision: 0.5065 - recall: 0.1082   

2025-03-22 20:03:13 - INFO - 
=== Epoch 28 Summary ===
2025-03-22 20:03:13 - INFO - Time: 1977.75s
2025-03-22 20:03:13 - INFO - Training   - accuracy: 0.9083 - auc: 0.7867 - f1_score: 0.1416 - loss: 0.1756 - precision: 0.5040 - recall: 0.1044 - learning_rate: 0.0001
2025-03-22 20:03:13 - INFO - Validation - accuracy: 0.9103 - auc: 0.7436 - f1_score: 0.0743 - loss: 0.2727 - precision: 0.3346 - recall: 0.0496



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

2025-03-22 20:04:42 - INFO - 
Loss Analysis - Atelectasis
2025-03-22 20:04:42 - INFO - Confidence Distribution:
2025-03-22 20:04:42 - INFO - -- High (>0.9): 0.00%
2025-03-22 20:04:42 - INFO - -- Medium (0.6-0.9): 0.25%
2025-03-22 20:04:42 - INFO - -- Uncertain (0.4-0.6): 3.03%
2025-03-22 20:04:42 - INFO - -- Low (<0.4): 96.72%
2025-03-22 20:04:42 - INFO - Performance:
2025-03-22 20:04:42 - INFO - -- True Positives: 60
2025-03-22 20:04:42 - INFO - -- False Positives: 103
2025-03-22 20:04:42 - INFO - -- Loss Contribution: 0.3204
2025-03-22 20:04:42 - INFO - Average Confidence:
2025-03-22 20:04:42 - INFO - -- Correct Predictions: 10.86%
2025-03-22 20:04:42 - INFO - -- Incorrect Predictions: 20.74%
2025-03-22 20:04:42 - INFO - 
Loss Analysis - Cardiomegaly
2025-03-22 20:04:42 - INFO - Confidence Distribution:
2025-03-22 20:04:42 - INFO - -- High (>0.9): 0.08%
2025-03-22 20:04:42 - INFO - -- Medium (0.6-0.9): 1.02%
2025-03-22 20:04:42 - INFO - -- Uncertain (0.4-0.6): 1.08%
2025-03-22 20:04:

[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2073s[0m 597ms/step - accuracy: 0.9083 - auc: 0.7881 - f1_score: 0.1457 - loss: 0.1746 - precision: 0.5065 - recall: 0.1082 - val_accuracy: 0.9103 - val_auc: 0.7436 - val_f1_score: 0.0743 - val_loss: 0.2727 - val_precision: 0.3346 - val_recall: 0.0496 - learning_rate: 1.0000e-04


2025-03-22 20:04:48 - INFO - 
=== Starting Epoch 29 ===



Epoch 29/30
[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 726ms/step - accuracy: 0.9083 - auc: 0.7857 - f1_score: 0.1414 - loss: 0.1761 - precision: 0.5083 - recall: 0.1044       

2025-03-22 20:47:33 - INFO - 
=== Epoch 29 Summary ===
2025-03-22 20:47:33 - INFO - Time: 2565.39s
2025-03-22 20:47:33 - INFO - Training   - accuracy: 0.9083 - auc: 0.7880 - f1_score: 0.1432 - loss: 0.1755 - precision: 0.5046 - recall: 0.1054 - learning_rate: 0.0001
2025-03-22 20:47:33 - INFO - Validation - accuracy: 0.8976 - auc: 0.7177 - f1_score: 0.0932 - loss: 0.3173 - precision: 0.2365 - recall: 0.0886



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

2025-03-22 20:49:00 - INFO - 
Loss Analysis - Atelectasis
2025-03-22 20:49:00 - INFO - Confidence Distribution:
2025-03-22 20:49:00 - INFO - -- High (>0.9): 0.00%
2025-03-22 20:49:00 - INFO - -- Medium (0.6-0.9): 0.05%
2025-03-22 20:49:00 - INFO - -- Uncertain (0.4-0.6): 0.92%
2025-03-22 20:49:00 - INFO - -- Low (<0.4): 99.04%
2025-03-22 20:49:00 - INFO - Performance:
2025-03-22 20:49:00 - INFO - -- True Positives: 21
2025-03-22 20:49:00 - INFO - -- False Positives: 19
2025-03-22 20:49:00 - INFO - -- Loss Contribution: 0.3406
2025-03-22 20:49:00 - INFO - Average Confidence:
2025-03-22 20:49:00 - INFO - -- Correct Predictions: 5.97%
2025-03-22 20:49:00 - INFO - -- Incorrect Predictions: 12.06%
2025-03-22 20:49:00 - INFO - 
Loss Analysis - Cardiomegaly
2025-03-22 20:49:00 - INFO - Confidence Distribution:
2025-03-22 20:49:00 - INFO - -- High (>0.9): 1.48%
2025-03-22 20:49:00 - INFO - -- Medium (0.6-0.9): 4.09%
2025-03-22 20:49:00 - INFO - -- Uncertain (0.4-0.6): 3.62%
2025-03-22 20:49:00

[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2658s[0m 778ms/step - accuracy: 0.9083 - auc: 0.7857 - f1_score: 0.1414 - loss: 0.1761 - precision: 0.5083 - recall: 0.1044 - val_accuracy: 0.8976 - val_auc: 0.7177 - val_f1_score: 0.0932 - val_loss: 0.3173 - val_precision: 0.2365 - val_recall: 0.0886 - learning_rate: 1.0000e-04


2025-03-22 20:49:06 - INFO - 
=== Starting Epoch 30 ===



Epoch 30/30
[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 566ms/step - accuracy: 0.9084 - auc: 0.7877 - f1_score: 0.1472 - loss: 0.1750 - precision: 0.5108 - recall: 0.1087   

2025-03-22 21:22:34 - INFO - 
=== Epoch 30 Summary ===
2025-03-22 21:22:34 - INFO - Time: 2007.88s
2025-03-22 21:22:34 - INFO - Training   - accuracy: 0.9086 - auc: 0.7906 - f1_score: 0.1487 - loss: 0.1747 - precision: 0.5107 - recall: 0.1098 - learning_rate: 0.0001
2025-03-22 21:22:34 - INFO - Validation - accuracy: 0.9094 - auc: 0.7423 - f1_score: 0.1018 - loss: 0.2810 - precision: 0.3631 - recall: 0.0785



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

2025-03-22 21:24:01 - INFO - 
Loss Analysis - Atelectasis
2025-03-22 21:24:01 - INFO - Confidence Distribution:
2025-03-22 21:24:01 - INFO - -- High (>0.9): 0.00%
2025-03-22 21:24:01 - INFO - -- Medium (0.6-0.9): 0.07%
2025-03-22 21:24:01 - INFO - -- Uncertain (0.4-0.6): 1.67%
2025-03-22 21:24:01 - INFO - -- Low (<0.4): 98.26%
2025-03-22 21:24:01 - INFO - Performance:
2025-03-22 21:24:01 - INFO - -- True Positives: 35
2025-03-22 21:24:01 - INFO - -- False Positives: 53
2025-03-22 21:24:01 - INFO - -- Loss Contribution: 0.3243
2025-03-22 21:24:01 - INFO - Average Confidence:
2025-03-22 21:24:01 - INFO - -- Correct Predictions: 9.71%
2025-03-22 21:24:01 - INFO - -- Incorrect Predictions: 17.00%
2025-03-22 21:24:01 - INFO - 
Loss Analysis - Cardiomegaly
2025-03-22 21:24:01 - INFO - Confidence Distribution:
2025-03-22 21:24:01 - INFO - -- High (>0.9): 0.55%
2025-03-22 21:24:01 - INFO - -- Medium (0.6-0.9): 1.71%
2025-03-22 21:24:01 - INFO - -- Uncertain (0.4-0.6): 1.40%
2025-03-22 21:24:01

[1m3378/3378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2100s[0m 614ms/step - accuracy: 0.9084 - auc: 0.7877 - f1_score: 0.1472 - loss: 0.1750 - precision: 0.5108 - recall: 0.1087 - val_accuracy: 0.9094 - val_auc: 0.7423 - val_f1_score: 0.1018 - val_loss: 0.2810 - val_precision: 0.3631 - val_recall: 0.0785 - learning_rate: 1.0000e-04


2025-03-22 21:24:06 - INFO - 
=== Training Completed! ===

2025-03-22 21:24:06 - INFO - Final Metrics: accuracy: 0.9086 - auc: 0.7906 - f1_score: 0.1487 - loss: 0.1747 - precision: 0.5107 - recall: 0.1098 - val_accuracy: 0.9094 - val_auc: 0.7423 - val_f1_score: 0.1018 - val_loss: 0.2810 - val_precision: 0.3631 - val_recall: 0.0785

