In [1]:
# Import Deps
import os
os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import pandas as pd
import numpy as np
import tensorflow as tf
import logging
from tqdm import tqdm

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, BinaryFocalCrossentropy
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

# Import Locals
from src.utils.consts import TF_RECORD_DATASET, MODELS_PATH, TF_BUFFER_SIZE, NUM_CLASSES
from src.model.tensorflow_utils import load_and_split_dataset, apply_augmentation_to_dataset, optimize_dataset
from src.model.tensorflow_utils import setup_logger, setup_training_logger, setup_metrics_monitor, setup_loss_monitor, get_metrics
from src.model.tensorflow_utils import calculate_class_weights, show_class_weights
from src.model.tensorflow_utils import show_augmented_sample, show_sample_record

# Input Data
DATASET_SIZE = 102697
model_name = "Simplified_DensNet_v2"
tfrecord_path = f"{TF_RECORD_DATASET}/chest_xray_data.tfrecord"

In [2]:
# Load Dataset
batch_size = 32
shuffle_buffer_size = 1000
tfrecord_buffer_size = TF_BUFFER_SIZE
dataset_size = DATASET_SIZE

train_ds, val_ds, test_ds = load_and_split_dataset(tfrecord_path, shuffle_buffer_size, tfrecord_buffer_size, dataset_size)

I0000 00:00:1740473345.298153 13419610 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1740473345.298196 13419610 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
show_sample_record(train_ds)


One-hot encoded labels:
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]


In [4]:
# Compile Model 
from src.model.tensorflow_densnet_basic import build_densenet121
metrics = get_metrics()

# Model V1
binary_loss = BinaryCrossentropy(label_smoothing=0.01, from_logits=False)
# Model V2
focal_loss  = BinaryFocalCrossentropy(gamma=2.0, alpha=0.25, from_logits=False, label_smoothing=0.01)

model = build_densenet121(NUM_CLASSES)
model.compile(optimizer=Adam(learning_rate=0.0001), loss=focal_loss, metrics=metrics)

In [5]:
# Optimize Dataset for rare clasess
class_weights = calculate_class_weights(train_ds, NUM_CLASSES)
show_class_weights(class_weights)

Class Weights:
Class 0: 0.60
Class 1: 2.50
Class 2: 1.49
Class 3: 3.01
Class 4: 0.52
Class 5: 2.69
Class 6: 4.11
Class 7: 28.19
Class 8: 0.35
Class 9: 1.18
Class 10: 0.13
Class 11: 1.09
Class 12: 2.04
Class 13: 5.04
Class 14: 1.28


In [None]:
# Trenowanie modelu
steps_per_epoch = 71887 or len(list(train_ds))
steps_per_epoch = int(steps_per_epoch / 32)

validation_steps = 15404 or len(list(val_ds))
validation_steps = int(validation_steps / 32)

# Optimize Data
train_ds = apply_augmentation_to_dataset(train_ds)
train_ds = optimize_dataset(train_ds, batch_size)
val_ds = optimize_dataset(val_ds, batch_size)
test_ds = optimize_dataset(test_ds, batch_size)

# Setup Loggers
logger = setup_logger()
training_logger = setup_training_logger(logger, batch_size, 100)
metrics_monitor = setup_metrics_monitor(MODELS_PATH, model_name, logger)
loss_monitor = setup_loss_monitor(MODELS_PATH, model_name, logger, val_ds)

model_path = f"{MODELS_PATH}/{model_name}.keras"
reduce_lr = ReduceLROnPlateau(monitor="val_f1_score", factor=0.5,  patience=3, min_lr=1e-6, mode="max", verbose=1)
checkpoint = ModelCheckpoint(model_path, monitor="val_f1_score", save_best_only=True, mode="max")

history = model.fit(
    train_ds.repeat(),
    validation_data=val_ds.repeat(),
    class_weight=class_weights,
    epochs=20,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    callbacks=[checkpoint, reduce_lr, training_logger, loss_monitor, metrics_monitor]
)

2025-02-25 09:50:17 - INFO - 
=== Training Started ===

2025-02-25 09:50:17 - INFO - Batch Size: 32
2025-02-25 09:50:17 - INFO - Optimizer: Adam
2025-02-25 09:50:17 - INFO - 

2025-02-25 09:50:17 - INFO - 
=== Starting Epoch 1 ===



Epoch 1/20
[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step - accuracy: 0.8890 - auc: 0.5388 - f1_score: 0.0518 - loss: 0.4931 - precision: 0.1213 - recall: 0.0391  

2025-02-25 10:02:12 - INFO - 
=== Epoch 1 Summary ===
2025-02-25 10:02:12 - INFO - Time: 714.02s
2025-02-25 10:02:12 - INFO - Training   - accuracy: 0.9057 - auc: 0.5527 - f1_score: 0.0316 - loss: 0.3653 - precision: 0.1348 - recall: 0.0189 - learning_rate: 0.0001
2025-02-25 10:02:12 - INFO - Validation - accuracy: 0.9142 - auc: 0.6269 - f1_score: 0.0005 - loss: 0.1961 - precision: 0.2778 - recall: 0.0003



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms

2025-02-25 10:05:39 - INFO - 
Loss Analysis - Atelectasis
2025-02-25 10:05:39 - INFO - Confidence Distribution:
2025-02-25 10:05:39 - INFO - -- High (>0.9): 0.00%
2025-02-25 10:05:39 - INFO - -- Medium (0.6-0.9): 0.00%
2025-02-25 10:05:39 - INFO - -- Uncertain (0.4-0.6): 0.00%
2025-02-25 10:05:39 - INFO - -- Low (<0.4): 100.00%
2025-02-25 10:05:39 - INFO - Performance:
2025-02-25 10:05:39 - INFO - -- True Positives: 0
2025-02-25 10:05:39 - INFO - -- False Positives: 0
2025-02-25 10:05:39 - INFO - -- Loss Contribution: 0.0389
2025-02-25 10:05:39 - INFO - Average Confidence:
2025-02-25 10:05:39 - INFO - -- Correct Predictions: 30.93%
2025-02-25 10:05:39 - INFO - -- Incorrect Predictions: 31.68%
2025-02-25 10:05:39 - INFO - 
Loss Analysis - Cardiomegaly
2025-02-25 10:05:39 - INFO - Confidence Distribution:
2025-02-25 10:05:39 - INFO - -- High (>0.9): 0.00%
2025-02-25 10:05:39 - INFO - -- Medium (0.6-0.9): 0.00%
2025-02-25 10:05:39 - INFO - -- Uncertain (0.4-0.6): 0.01%
2025-02-25 10:05:39

[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m922s[0m 403ms/step - accuracy: 0.8890 - auc: 0.5388 - f1_score: 0.0518 - loss: 0.4931 - precision: 0.1213 - recall: 0.0391 - val_accuracy: 0.9142 - val_auc: 0.6269 - val_f1_score: 4.8811e-04 - val_loss: 0.1961 - val_precision: 0.2778 - val_recall: 2.5247e-04 - learning_rate: 1.0000e-04


2025-02-25 10:05:39 - INFO - 
=== Starting Epoch 2 ===



Epoch 2/20
[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266ms/step - accuracy: 0.9124 - auc: 0.5740 - f1_score: 0.0118 - loss: 0.1386 - precision: 0.1745 - recall: 0.0064     

2025-02-25 10:17:37 - INFO - 
=== Epoch 2 Summary ===
2025-02-25 10:17:37 - INFO - Time: 717.82s
2025-02-25 10:17:37 - INFO - Training   - accuracy: 0.9125 - auc: 0.5815 - f1_score: 0.0114 - loss: 0.1158 - precision: 0.1737 - recall: 0.0062 - learning_rate: 0.0001
2025-02-25 10:17:37 - INFO - Validation - accuracy: 0.9142 - auc: 0.6405 - f1_score: 0.0000 - loss: 0.1024 - precision: 0.0000 - recall: 0.0000



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 217ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7

2025-02-25 10:21:05 - INFO - 
Loss Analysis - Atelectasis
2025-02-25 10:21:05 - INFO - Confidence Distribution:
2025-02-25 10:21:05 - INFO - -- High (>0.9): 0.00%
2025-02-25 10:21:05 - INFO - -- Medium (0.6-0.9): 0.00%
2025-02-25 10:21:05 - INFO - -- Uncertain (0.4-0.6): 0.00%
2025-02-25 10:21:05 - INFO - -- Low (<0.4): 100.00%
2025-02-25 10:21:05 - INFO - Performance:
2025-02-25 10:21:05 - INFO - -- True Positives: 0
2025-02-25 10:21:05 - INFO - -- False Positives: 0
2025-02-25 10:21:05 - INFO - -- Loss Contribution: 0.0335
2025-02-25 10:21:05 - INFO - Average Confidence:
2025-02-25 10:21:05 - INFO - -- Correct Predictions: 26.02%
2025-02-25 10:21:05 - INFO - -- Incorrect Predictions: 27.24%
2025-02-25 10:21:05 - INFO - 
Loss Analysis - Cardiomegaly
2025-02-25 10:21:05 - INFO - Confidence Distribution:
2025-02-25 10:21:05 - INFO - -- High (>0.9): 0.00%
2025-02-25 10:21:05 - INFO - -- Medium (0.6-0.9): 0.00%
2025-02-25 10:21:05 - INFO - -- Uncertain (0.4-0.6): 0.03%
2025-02-25 10:21:05

[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m926s[0m 411ms/step - accuracy: 0.9124 - auc: 0.5740 - f1_score: 0.0118 - loss: 0.1386 - precision: 0.1745 - recall: 0.0064 - val_accuracy: 0.9142 - val_auc: 0.6405 - val_f1_score: 0.0000e+00 - val_loss: 0.1024 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.0000e-04


2025-02-25 10:21:05 - INFO - 
=== Starting Epoch 3 ===



Epoch 3/20
[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 275ms/step - accuracy: 0.9130 - auc: 0.5804 - f1_score: 0.0095 - loss: 0.0788 - precision: 0.1883 - recall: 0.0051  

2025-02-25 10:33:21 - INFO - 
=== Epoch 3 Summary ===
2025-02-25 10:33:21 - INFO - Time: 735.98s
2025-02-25 10:33:21 - INFO - Training   - accuracy: 0.9130 - auc: 0.5884 - f1_score: 0.0095 - loss: 0.0753 - precision: 0.1853 - recall: 0.0052 - learning_rate: 0.0001
2025-02-25 10:33:21 - INFO - Validation - accuracy: 0.9142 - auc: 0.6418 - f1_score: 0.0000 - loss: 0.0891 - precision: 0.0000 - recall: 0.0000



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 219ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

2025-02-25 10:37:09 - INFO - 
Loss Analysis - Atelectasis
2025-02-25 10:37:09 - INFO - Confidence Distribution:
2025-02-25 10:37:09 - INFO - -- High (>0.9): 0.00%
2025-02-25 10:37:09 - INFO - -- Medium (0.6-0.9): 0.00%
2025-02-25 10:37:09 - INFO - -- Uncertain (0.4-0.6): 0.00%
2025-02-25 10:37:09 - INFO - -- Low (<0.4): 100.00%
2025-02-25 10:37:09 - INFO - Performance:
2025-02-25 10:37:09 - INFO - -- True Positives: 0
2025-02-25 10:37:09 - INFO - -- False Positives: 0
2025-02-25 10:37:09 - INFO - -- Loss Contribution: 0.0365
2025-02-25 10:37:09 - INFO - Average Confidence:
2025-02-25 10:37:09 - INFO - -- Correct Predictions: 29.31%
2025-02-25 10:37:09 - INFO - -- Incorrect Predictions: 30.40%
2025-02-25 10:37:09 - INFO - 
Loss Analysis - Cardiomegaly
2025-02-25 10:37:09 - INFO - Confidence Distribution:
2025-02-25 10:37:09 - INFO - -- High (>0.9): 0.00%
2025-02-25 10:37:09 - INFO - -- Medium (0.6-0.9): 0.00%
2025-02-25 10:37:09 - INFO - -- Uncertain (0.4-0.6): 0.00%
2025-02-25 10:37:09

[1m2246/2246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m964s[0m 429ms/step - accuracy: 0.9130 - auc: 0.5804 - f1_score: 0.0095 - loss: 0.0788 - precision: 0.1883 - recall: 0.0051 - val_accuracy: 0.9142 - val_auc: 0.6418 - val_f1_score: 0.0000e+00 - val_loss: 0.0891 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.0000e-04


2025-02-25 10:37:09 - INFO - 
=== Starting Epoch 4 ===



Epoch 4/20
[1m  77/2246[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m9:19[0m 258ms/step - accuracy: 0.9125 - auc: 0.5860 - f1_score: 0.0074 - loss: 0.0684 - precision: 0.1697 - recall: 0.0040             

In [None]:
# Initialize the evaluator
from src.model.tensorflow_model_evaluation import ModelEvaluation

mappings_path = f"{TF_RECORD_DATASET}/label_mappings.csv"
evaluator = ModelEvaluation(
    model=model, 
    model_name=model_name, 
    test_dataset=test_ds,
    label_mappings_path=mappings_path,
    output_dir=MODELS_PATH
)

# Cell 2: Generate predictions and basic metrics
metrics = evaluator.evaluate()

In [None]:
evaluator.generate_classification_report()

In [None]:
evaluator.plot_confusion_matrices()

In [None]:
for image, labels in test_ds.take(1):
    if len(image.shape) == 4:
        image = image[0]
        labels = labels[0]
    
    evaluator.visualize_prediction(image=image, true_labels=labels)