 # Chest X-Ray Classification with DenseNet121

 ## Hyperparameter Tuning Notebook
    
This notebook performs hyperparameter optimization for a DenseNet121 model on chest X-ray classification.

In [4]:
!nvidia-smi

Wed Apr  2 13:02:56 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.144.03             Driver Version: 550.144.03     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       On  |   00000000:00:1E.0 Off |                    0 |
| N/A   23C    P8             10W /   70W |       1MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

## Install Required Packages

In [1]:
!pip install -r requirements.txt

Collecting keras_tuner>=1.1.0 (from -r requirements.txt (line 2))
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras_tuner>=1.1.0->-r requirements.txt (line 2))
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras_tuner
Successfully installed keras_tuner-1.4.7 kt-legacy-1.0.5


In [6]:
!unzip balanced_dataset.zip -d ./balanced_dataset

Archive:  balanced_dataset.zip
   creating: ./balanced_dataset/balanced_dataset/
   creating: ./balanced_dataset/balanced_dataset/train/
   creating: ./balanced_dataset/balanced_dataset/train/Atelectasis/
  inflating: ./balanced_dataset/balanced_dataset/train/Atelectasis/00007922_003.png  
  inflating: ./balanced_dataset/balanced_dataset/train/Atelectasis/00004822_025.png  
  inflating: ./balanced_dataset/balanced_dataset/train/Atelectasis/00004915_006.png  
  inflating: ./balanced_dataset/balanced_dataset/train/Atelectasis/00028410_000.png  
  inflating: ./balanced_dataset/balanced_dataset/train/Atelectasis/00013373_000.png  
  inflating: ./balanced_dataset/balanced_dataset/train/Atelectasis/00016898_000.png  
  inflating: ./balanced_dataset/balanced_dataset/train/Atelectasis/00005348_006.png  
  inflating: ./balanced_dataset/balanced_dataset/train/Atelectasis/00002491_003.png  
  inflating: ./balanced_dataset/balanced_dataset/train/Atelectasis/00008191_006.png  
  inflating: ./balanc

## 1. Import Libraries

In [2]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TensorBoard
import keras_tuner as kt
import datetime
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

2025-04-02 13:02:27.627378: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-02 13:02:27.866492: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-02 13:02:27.918109: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-02 13:02:27.933929: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-02 13:02:28.173649: I tensorflow/core/platform/cpu_feature_guar

## 2. Directory Setup & Parameters

In [7]:
!ls -l

total 575692
-rw-r--r-- 1 sagemaker-user users      5091 Apr  2 13:01 balance_dataset.py
drwxrwxr-x 5 sagemaker-user users        42 Mar 23 09:35 balanced_dataset
-rw-r--r-- 1 sagemaker-user users 588914598 Apr  2 13:10 balanced_dataset.zip
-rw-r--r-- 1 sagemaker-user users      1646 Apr  2 13:01 data_spliting.py
-rw-r--r-- 1 sagemaker-user users      5394 Apr  2 13:01 densenet121.py
-rw-r--r-- 1 sagemaker-user users      9000 Apr  2 13:01 hyperparameter_tuning.py
-rw-r--r-- 1 sagemaker-user users    553272 Apr  2 13:11 hyperparameter_tunning.ipynb
drwxr-xr-x 4 sagemaker-user users        52 Apr  2 13:01 logs_1
-rw-r--r-- 1 sagemaker-user users       129 Apr  2 13:01 requirements.txt


In [8]:
# -----------------------------
# A) Directory Setup & Hyperparams
# -----------------------------
OUTPUT_DIR = './balanced_dataset'
CLASSES = ['Atelectasis', 'Cardiomegaly', 'No Finding', 'Nodule', 'Pneumothorax']

TRAIN_DIR = os.path.join(OUTPUT_DIR, 'train')
VAL_DIR   = os.path.join(OUTPUT_DIR, 'val')
TEST_DIR  = os.path.join(OUTPUT_DIR, 'test')

IMG_SIZE  = (512, 512)
NUM_CLASSES = len(CLASSES)


## 3. Data Generators

In [9]:
# -----------------------------
# B) Data Generators
# -----------------------------
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
    zoom_range=0.2,
    brightness_range=[0.8, 1.2],
    shear_range=0.15,
    channel_shift_range=0.1,
    fill_mode='reflect',
    preprocessing_function=lambda x: tf.image.random_contrast(x, lower=0.8, upper=1.2)
)

val_datagen = ImageDataGenerator(rescale=1.0/255)

## 4. Model Building Function for Keras Tuner

In [10]:
# -----------------------------
# C) Model Building Function for Keras Tuner
# -----------------------------
def build_model(hp):
    # Hyperparameters to tune
    l2_reg = hp.Float('l2_reg', min_value=1e-6, max_value=1e-2, sampling='log')
    dropout_rate = hp.Float('dropout_rate', min_value=0.1, max_value=0.7, step=0.1)
    dense_units_1 = hp.Int('dense_units_1', min_value=512, max_value=2048, step=256)
    dense_units_2 = hp.Int('dense_units_2', min_value=256, max_value=1024, step=256)
    dense_units_3 = hp.Int('dense_units_3', min_value=128, max_value=512, step=128)
    learning_rate = hp.Float('learning_rate', min_value=1e-5, max_value=1e-3, sampling='log')

    # Build the model
    base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
    base_model.trainable = True

    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(dense_units_1, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),
        layers.Dense(dense_units_2, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),
        layers.Dense(dense_units_3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Dropout(dropout_rate),
        layers.Dense(NUM_CLASSES, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(l2_reg))
    ])

    # Compile the model
    model.compile(
        optimizer=optimizers.Adam(learning_rate=learning_rate),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

## 5. Tuner Setup

In [11]:
# -----------------------------
# D) Tuner Setup
# -----------------------------
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=20,  # Number of trials to run
    directory='hyperparameter_tuning',
    project_name='densenet121_tuning'
)


I0000 00:00:1743599576.788936    1622 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1743599577.234188    1622 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1743599577.237552    1622 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1743599577.242341    1622 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step 


## 6. Callbacks Setup

In [12]:
# -----------------------------
# E) Callbacks
# -----------------------------
# Create a timestamp for unique log directory
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = os.path.join('logs', timestamp)

# Enhanced TensorBoard callback with more metrics
tensorboard_cb = TensorBoard(
    log_dir=log_dir,
    histogram_freq=1,
    update_freq='batch',
    write_graph=True,
    write_images=True,
    profile_batch=2
)

# Create checkpoints directory if it doesn't exist
os.makedirs('checkpoints', exist_ok=True)

# Callbacks for hyperparameter search (without checkpoint)
search_callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-7),
    tensorboard_cb
]

# Callbacks for final training (with checkpoint)
final_callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-7),
    tensorboard_cb,
    ModelCheckpoint(
        filepath=os.path.join('checkpoints', f'best_model_{timestamp}.h5'),
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    )
]

2025-04-02 13:13:24.004750: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:103] Profiler session initializing.
2025-04-02 13:13:24.004784: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:118] Profiler session started.
2025-04-02 13:13:24.006662: I external/local_xla/xla/backends/profiler/gpu/cupti_tracer.cc:892] Profiler found 1 GPUs
2025-04-02 13:13:24.079139: W external/local_xla/xla/backends/profiler/gpu/cupti_tracer.cc:999] Fail to use per-thread activity buffer, cupti trace overhead may be big. CUPTI ERROR CODE:1
2025-04-02 13:13:24.079366: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:130] Profiler session tear down.
2025-04-02 13:13:24.079499: I external/local_xla/xla/backends/profiler/gpu/cupti_tracer.cc:1036] CUPTI activity buffer flushed


## 7. Run Hyperparameter Search

In [None]:
# -----------------------------
# F) Run Hyperparameter Search
# -----------------------------
print(f"\nStarting hyperparameter search. TensorBoard logs will be saved to: {log_dir}")
print(f"To view TensorBoard, run: tensorboard --logdir {log_dir}")
tuner.search(
    train_datagen.flow_from_directory(
        directory=TRAIN_DIR,
        target_size=IMG_SIZE,
        batch_size=8,  # Fixed batch size
        class_mode='categorical'
    ),
    validation_data=val_datagen.flow_from_directory(
        directory=VAL_DIR,
        target_size=IMG_SIZE,
        batch_size=8,  # Fixed batch size
        class_mode='categorical'
    ),
    callbacks=search_callbacks,  # Use search callbacks without checkpoint
    epochs=100  # Fixed epochs
)


Starting hyperparameter search. TensorBoard logs will be saved to: logs/20250402-131324
To view TensorBoard, run: tensorboard --logdir logs/20250402-131324
Found 4370 images belonging to 5 classes.
Found 545 images belonging to 5 classes.

Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
2.7335e-05        |2.7335e-05        |l2_reg
0.2               |0.2               |dropout_rate
2048              |2048              |dense_units_1
256               |256               |dense_units_2
384               |384               |dense_units_3
0.00097217        |0.00097217        |learning_rate



  self._warn_if_super_not_called()


Epoch 1/100


I0000 00:00:1743599676.854692    6446 service.cc:146] XLA service 0x7f9c300591c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743599676.854729    6446 service.cc:154]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2025-04-02 13:14:38.858259: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-04-02 13:14:45.664170: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90701
2025-04-02 13:17:36.619231: E external/local_xla/xla/service/slow_operation_alarm.cc:65] 
********************************
[Compiling module a_inference_one_step_on_data_87794__.56235] Very slow compile? If you want to file a bug, run with envvar XLA_FLAGS=--xla_dump_to=/tmp/foo and attach the results.
********************************

2025-04-02 13:17:40.301046: E external/local_xla/xla/service/slow_operatio

[1m  1/547[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m36:46:17[0m 242s/step - accuracy: 0.2500 - loss: 3.0178

2025-04-02 13:17:40.835958: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:103] Profiler session initializing.
2025-04-02 13:17:40.835996: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:118] Profiler session started.
2025-04-02 13:17:40.867677: W external/local_xla/xla/backends/profiler/gpu/cupti_tracer.cc:999] Fail to use per-thread activity buffer, cupti trace overhead may be big. CUPTI ERROR CODE:1
2025-04-02 13:17:41.194127: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:68] Profiler session collecting data.
2025-04-02 13:17:41.353274: I external/local_xla/xla/backends/profiler/gpu/cupti_tracer.cc:1036] CUPTI activity buffer flushed


[1m  2/547[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:34[0m 724ms/step - accuracy: 0.2812 - loss: 2.7723   

2025-04-02 13:17:41.498279: I external/local_xla/xla/backends/profiler/gpu/cupti_collector.cc:534]  GpuTracer has collected 2542 callback api events and 3085 activity events. 
2025-04-02 13:17:41.526043: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:130] Profiler session tear down.
2025-04-02 13:17:41.532998: I external/local_tsl/tsl/profiler/rpc/client/save_profile.cc:147] Collecting XSpace to repository: logs/20250402-131324/00/execution0/train/plugins/profile/2025_04_02_13_17_41/default.xplane.pb


[1m 11/547[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:45[0m 756ms/step - accuracy: 0.2944 - loss: 2.5485

## 8. Get Best Hyperparameters

In [None]:
# -----------------------------
# G) Get Best Hyperparameters
# -----------------------------
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"L2 Regularization: {best_hps.get('l2_reg')}")
print(f"Dropout Rate: {best_hps.get('dropout_rate')}")
print(f"Dense Units 1: {best_hps.get('dense_units_1')}")
print(f"Dense Units 2: {best_hps.get('dense_units_2')}")
print(f"Dense Units 3: {best_hps.get('dense_units_3')}")
print(f"Learning Rate: {best_hps.get('learning_rate')}")

## 9. Train Final Model with Best Hyperparameters

In [None]:
print("\nTraining final model with best hyperparameters...")
model = tuner.hypermodel.build(best_hps)
history = model.fit(
    train_datagen.flow_from_directory(
        directory=TRAIN_DIR,
        target_size=IMG_SIZE,
        batch_size=8,  # Fixed batch size
        class_mode='categorical'
    ),
    validation_data=val_datagen.flow_from_directory(
        directory=VAL_DIR,
        target_size=IMG_SIZE,
        batch_size=8,  # Fixed batch size
        class_mode='categorical'
    ),
    epochs=50,  # Fixed epochs
    callbacks=final_callbacks  # Use final callbacks with checkpoint
)

# Save the best model
model.save('best_model_tuned.h5')
print(f"\nTraining complete! Model saved as 'best_model_tuned.h5'")
print(f"To view training metrics in TensorBoard, run: tensorboard --logdir logs")

## 10. Plot Training History

In [None]:
plt.figure(figsize=(16, 6))

# Plot accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.savefig('training_history.png')
plt.show()

## 11. Evaluate on Test Set and Plot Confusion Matrix

In [None]:
# -----------------------------
# I) Evaluate on Test Set and Plot Confusion Matrix
# -----------------------------
print("\nEvaluating model on test set...")
test_datagen = ImageDataGenerator(rescale=1.0/255)
test_generator = test_datagen.flow_from_directory(
    directory=TEST_DIR,
    target_size=IMG_SIZE,
    batch_size=8,  # Fixed batch size
    class_mode='categorical',
    shuffle=False
)

# Evaluate the model
test_loss, test_acc = model.evaluate(test_generator)
print(f"\nTest Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

# Get predictions
predictions = model.predict(test_generator, steps=test_generator.samples // test_generator.batch_size + 1)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

# Compute and plot confusion matrix
cm = confusion_matrix(true_classes, predicted_classes)
plt.figure(figsize=(10, 8))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_labels)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Best Model")
plt.tight_layout()
plt.savefig('confusion_matrix_best_model.png')
plt.close()

# Print classification report
from sklearn.metrics import classification_report
print("\nClassification Report:")
print(classification_report(true_classes, predicted_classes, target_names=class_labels)) 