In [1]:
import kagglehub
kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")

'/kaggle/input/chest-xray-pneumonia'

**Test and Train have same number of data so transfering some data to Train**

In [1]:
import shutil
import os
import random

# delete old version
shutil.rmtree("/kaggle/working/chest_split", ignore_errors=True)

base = "/kaggle/working/chest"
original = "/kaggle/input/chest-xray-pneumonia/chest_xray"


folders = [
    f"{base}/train/NORMAL",
    f"{base}/train/PNEUMONIA",
    f"{base}/val/NORMAL",
    f"{base}/val/PNEUMONIA",
    f"{base}/test/NORMAL",
    f"{base}/test/PNEUMONIA",
]

for f in folders:
    os.makedirs(f, exist_ok=True)


for split in ["train", "val", "test"]:
    for cls in ["NORMAL", "PNEUMONIA"]:
        src = f"{original}/{split}/{cls}"
        dst = f"{base}/{split}/{cls}"
        for file in os.listdir(src):
            shutil.copy(os.path.join(src, file), os.path.join(dst, file))

#Move exactly 350 → 175 from each class
move_count = 350
num_to_move = move_count // 2

for cls in ["NORMAL", "PNEUMONIA"]:
    src_folder = f"{base}/test/{cls}"
    dst_folder = f"{base}/train/{cls}"

    files = os.listdir(src_folder)
    random.shuffle(files)

    for file in files[:num_to_move]:
        shutil.move(os.path.join(src_folder, file), os.path.join(dst_folder, file))



for cls in ["NORMAL", "PNEUMONIA"]:
    print(f"{cls}: train={len(os.listdir(f'{base}/train/{cls}'))}, "
          f"test={len(os.listdir(f'{base}/test/{cls}'))}")


NORMAL: train=1516, test=59
PNEUMONIA: train=4050, test=215


In [2]:
import tensorflow as tf

classes = ["NORMAL", "PNEUMONIA"]

train_set = tf.keras.preprocessing.image_dataset_from_directory(
    "/kaggle/working/chest/train",
    labels='inferred',
    label_mode='categorical',
    class_names=classes,
    image_size=(256, 256),
    batch_size=32,
    shuffle=True,
    seed=42
)
val_set = tf.keras.preprocessing.image_dataset_from_directory(
    "/kaggle/working/chest/val",
    labels='inferred',
    label_mode='categorical',
    class_names=classes,
    image_size=(256, 256),
    batch_size=32,
    shuffle=True,
    seed=42
)
test_set = tf.keras.preprocessing.image_dataset_from_directory(
    "/kaggle/working/chest/test",
    labels='inferred',
    label_mode='categorical',
    class_names=classes,
    image_size=(256, 256),
    batch_size=32,
    shuffle=True,
    seed=42
)

2025-11-11 14:31:54.832603: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762871515.021037      48 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762871515.073266      48 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

Found 5566 files belonging to 2 classes.


I0000 00:00:1762871529.936727      48 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Found 16 files belonging to 2 classes.
Found 274 files belonging to 2 classes.


1

In [3]:
from tensorflow.keras import layers

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
    layers.RandomTranslation(0.05, 0.05),
    layers.RandomContrast(0.1),
])


train_set = train_set.map(
    lambda x, y: (data_augmentation(x, training=True), y),
    num_parallel_calls=tf.data.AUTOTUNE
)


train_set = train_set.cache().shuffle(1000).prefetch(tf.data.AUTOTUNE)
val_set   = val_set.cache().prefetch(tf.data.AUTOTUNE)
test_set  = test_set.cache().prefetch(tf.data.AUTOTUNE)


In [7]:
from tensorflow.keras.applications import EfficientNetB0


base = EfficientNetB0(
    weights="imagenet",
    include_top=False,
    input_shape=(256,256,3)
)

base.trainable = False


In [9]:
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras import models, Model

head = models.Sequential([
    GlobalAveragePooling2D(),
    Dropout(0.3),
    Dense(2, activation="softmax")
])

output = head(base.output)
model = Model(inputs=base.input, outputs=output)


In [10]:
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

In [11]:
from tensorflow.keras.callbacks import EarlyStopping #EarlyStopping stops training if model stops improving
from tensorflow.keras.callbacks import ReduceLROnPlateau #ReduceLROnPlateau lowers or higher learning rate automatically

callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ReduceLROnPlateau(patience=2)
]

model.fit(
    train_set,
    validation_data=val_set,
    epochs=15,
    callbacks=callbacks
)


Epoch 1/15


I0000 00:00:1762871661.668622     115 service.cc:148] XLA service 0x78dba4002d60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1762871661.669470     115 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1762871663.441556     115 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  5/174[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6s[0m 38ms/step - accuracy: 0.6235 - loss: 0.6739   

I0000 00:00:1762871672.156131     115 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 131ms/step - accuracy: 0.8186 - loss: 0.3899 - val_accuracy: 0.9375 - val_loss: 0.2802 - learning_rate: 0.0010
Epoch 2/15
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 36ms/step - accuracy: 0.9198 - loss: 0.2028 - val_accuracy: 0.9375 - val_loss: 0.2385 - learning_rate: 0.0010
Epoch 3/15
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 35ms/step - accuracy: 0.9239 - loss: 0.1848 - val_accuracy: 0.9375 - val_loss: 0.2191 - learning_rate: 0.0010
Epoch 4/15
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 35ms/step - accuracy: 0.9357 - loss: 0.1753 - val_accuracy: 0.8750 - val_loss: 0.2658 - learning_rate: 0.0010
Epoch 5/15
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 35ms/step - accuracy: 0.9405 - loss: 0.1578 - val_accuracy: 0.9375 - val_loss: 0.2233 - learning_rate: 0.0010
Epoch 6/15
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[

<keras.src.callbacks.history.History at 0x78dbd636f910>

In [12]:
model.evaluate(test_set)
##AHHH WE GOT 90% ACCURACY

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 607ms/step - accuracy: 0.9046 - loss: 0.2690


[0.23401737213134766, 0.9124087691307068]

In [13]:
model.save("pneumonia_efficientnet_tf.keras")

In [29]:
import tensorflow as tf
import numpy as np

model = tf.keras.models.load_model("pneumonia_efficientnet_tf.keras")

img_path = "/kaggle/working/chest/test/NORMAL/IM-0019-0001.jpeg"
img = tf.keras.preprocessing.image.load_img(img_path, target_size=(256,256))
x = tf.keras.preprocessing.image.img_to_array(img)
x = np.expand_dims(x, axis=0)

pred = model.predict(x)[0][0]
print("Normal" if pred > 0.5 else "Pneumonia")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
Normal
