In [5]:
# Imports & Paths
import torch
import torchvision
import torch.nn as nn
import kagglehub
import os
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

torch.manual_seed(42)

<torch._C.Generator at 0x1084b15d0>

In [6]:
path = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")
# Use the returned path to set base_dir correctly
base_dir = os.path.join(path, "chest_xray")
train_dir = os.path.join(base_dir, 'train')
val_dir   = os.path.join(base_dir, 'val')
test_dir  = os.path.join(base_dir, 'test')

Resuming download from 293601280 bytes (2169764155 bytes left)...
Resuming download from https://www.kaggle.com/api/v1/datasets/download/paultimothymooney/chest-xray-pneumonia?dataset_version_number=2 (293601280/2463365435) bytes left.


100%|██████████| 2.29G/2.29G [03:27<00:00, 10.4MB/s]

Extracting files...





In [8]:
print("Train dir:", train_dir)
print("Val dir:", val_dir)
print("Test dir:", test_dir)

print("Train dir contents:", os.listdir(train_dir))
print("Val dir contents:", os.listdir(val_dir))
print("Test dir contents:", os.listdir(test_dir))

Train dir: /Users/daniel/.cache/kagglehub/datasets/paultimothymooney/chest-xray-pneumonia/versions/2/chest_xray/train
Val dir: /Users/daniel/.cache/kagglehub/datasets/paultimothymooney/chest-xray-pneumonia/versions/2/chest_xray/val
Test dir: /Users/daniel/.cache/kagglehub/datasets/paultimothymooney/chest-xray-pneumonia/versions/2/chest_xray/test
Train dir contents: ['.DS_Store', 'PNEUMONIA', 'NORMAL']
Val dir contents: ['PNEUMONIA', 'NORMAL']
Test dir contents: ['PNEUMONIA', 'NORMAL']


In [None]:
# Data Generators & Augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    train_dir, target_size=(224,224), batch_size=32, class_mode='binary'
)
val_gen = val_datagen.flow_from_directory(
    val_dir,   target_size=(224,224), batch_size=32, class_mode='binary'
)
test_gen = test_datagen.flow_from_directory(
    test_dir,  target_size=(224,224), batch_size=32, class_mode='binary', shuffle=False
)

In [None]:
# Define the CNN Model
model = Sequential([
    Conv2D(32, (3,3), padding='same', activation='relu', input_shape=(224,224,3)),
    BatchNormalization(),
    MaxPooling2D((2,2)),

    Conv2D(64, (3,3), padding='same', activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2,2)),

    Conv2D(128, (3,3), padding='same', activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2,2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# Train the Model
history = model.fit(
    train_gen,
    epochs=20,
    validation_data=val_gen
)

In [None]:
# Evaluate on the Test Set
test_loss, test_acc = model.evaluate(test_gen)
print(f"Test accuracy: {test_acc:.3f}")

In [None]:
# Plot Training & Validation Accuracy
plt.figure()
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()