In [3]:
#imports
import os
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight


2025-12-22 09:17:45.572043: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766395065.764495      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766395065.821140      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1766395066.272418      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766395066.272454      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1766395066.272457      55 computation_placer.cc:177] computation placer alr

In [4]:
#Paths
DATASET_DIR = "/kaggle/input/chest-xray-images-guangzhou-women-and-childrens/chest_xray/"
TRAIN_DIR = os.path.join(DATASET_DIR, "train")
TEST_DIR  = os.path.join(DATASET_DIR, "test")
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [None]:
len_normal = len(os.listdir(os.path.join(TRAIN_DIR, 'NORMAL')))
len_pneumonia = len(os.listdir(os.path.join(TRAIN_DIR, 'PNEUMONIA')))

plt.figure(figsize=(6,4))
plt.bar(['Normal', 'Pneumonia'], [len_normal, len_pneumonia], color=['green', 'red'])
plt.title('Class Imbalance')
plt.show()

In [5]:
# Data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    zoom_range=0.1,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1./255)

In [6]:
#datasets
train_gen = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary"
)

val_gen = test_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary"
)

test_gen = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
    shuffle=False
)

Found 5232 images belonging to 2 classes.
Found 5232 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [7]:
#handling class imbalance
class_weights_array = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(train_gen.classes),
    y=train_gen.classes
)

class_weights = {
    0: class_weights_array[0],
    1: class_weights_array[1]
}


In [8]:
#load the model
base_model = VGG16(
    weights="imagenet",
    include_top=False,
    input_shape=(224, 224, 3)
)

base_model.trainable = False


I0000 00:00:1766395082.045260      55 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1766395082.049176      55 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [None]:
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(1, activation="sigmoid")
])

In [10]:
model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [12]:
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    class_weight=class_weights
)


Epoch 1/10
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 831ms/step - accuracy: 0.9398 - loss: 0.1559 - val_accuracy: 0.9289 - val_loss: 0.1816
Epoch 2/10
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 835ms/step - accuracy: 0.9309 - loss: 0.1733 - val_accuracy: 0.9268 - val_loss: 0.1908
Epoch 3/10
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 846ms/step - accuracy: 0.9438 - loss: 0.1563 - val_accuracy: 0.9308 - val_loss: 0.1756
Epoch 4/10
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 834ms/step - accuracy: 0.9425 - loss: 0.1537 - val_accuracy: 0.9281 - val_loss: 0.1832
Epoch 5/10
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 841ms/step - accuracy: 0.9358 - loss: 0.1611 - val_accuracy: 0.9253 - val_loss: 0.1943
Epoch 6/10
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 829ms/step - accuracy: 0.9392 - loss: 0.1533 - val_accuracy: 0.9190 - val_loss: 0.2037
Epoc

In [16]:
#Fine-tuning 

#Unfreeze last convolution block only
for layer in base_model.layers[-4:]:
    layer.trainable = True

#Recompile
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [17]:
fine_tune_history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=5,
    class_weight=class_weights
)

Epoch 1/5
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 847ms/step - accuracy: 0.9480 - loss: 0.1277 - val_accuracy: 0.9539 - val_loss: 0.1205
Epoch 2/5
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 843ms/step - accuracy: 0.9652 - loss: 0.0935 - val_accuracy: 0.9650 - val_loss: 0.0907
Epoch 3/5
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 827ms/step - accuracy: 0.9662 - loss: 0.0837 - val_accuracy: 0.9515 - val_loss: 0.1358
Epoch 4/5
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 829ms/step - accuracy: 0.9675 - loss: 0.0887 - val_accuracy: 0.9402 - val_loss: 0.1694
Epoch 5/5
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 837ms/step - accuracy: 0.9770 - loss: 0.0533 - val_accuracy: 0.9774 - val_loss: 0.0613


In [40]:
#Final evaluation (true test set)
test_gen.reset()
pred_probs = model.predict(test_gen)
pred_labels = (pred_probs > 0.85).astype(int).ravel()
true_labels = test_gen.classes

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 226ms/step


In [41]:
print(
    classification_report(
        true_labels,
        pred_labels,
        target_names=["Normal", "Pneumonia"]
    )
)


              precision    recall  f1-score   support

      Normal       0.95      0.81      0.87       234
   Pneumonia       0.89      0.97      0.93       390

    accuracy                           0.91       624
   macro avg       0.92      0.89      0.90       624
weighted avg       0.91      0.91      0.91       624

