In [1]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import os
import sys

In [2]:
# Define Paths
base_dir = '../datasets/chest_xray'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

In [3]:
# Check if paths are correct
if not all([os.path.exists(train_dir), os.path.exists(val_dir), os.path.exists(test_dir)]):
    print(f"Error: Could not find 'train', 'val', or 'test' directories in {base_dir}.")
    print("Please make sure the 'chest_xray' dataset is correctly placed.")
    # In a notebook, we'd stop here, but we'll set a flag for subsequent cells
    data_exists = False
else:
    print("Dataset directories found.")
    data_exists = True

Dataset directories found.


In [4]:
# Image Preprocessing Constants
IMG_WIDTH, IMG_HEIGHT = 150, 150
BATCH_SIZE = 32

In [5]:
# Generator for TRAINING data (with augmentation)
if data_exists:
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    # Generator for VALIDATION and TEST data (ONLY rescale)
    val_test_datagen = ImageDataGenerator(rescale=1./255)

In [6]:
# Create the generators from directories
if data_exists:
    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(IMG_WIDTH, IMG_HEIGHT),
        batch_size=BATCH_SIZE,
        class_mode='binary' # NORMAL vs PNEUMONIA
    )

    validation_generator = val_test_datagen.flow_from_directory(
        val_dir,
        target_size=(IMG_WIDTH, IMG_HEIGHT),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        shuffle=False 
    )

    test_generator = val_test_datagen.flow_from_directory(
        test_dir,
        target_size=(IMG_WIDTH, IMG_HEIGHT),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        shuffle=False
    )

Found 5216 images belonging to 2 classes.
Found 16 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [7]:
# Build the CNN Model
if data_exists:
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid') # 1 output neuron, 0 = NORMAL, 1 = PNEUMONIA
    ])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# Compile the Model
if data_exists:
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    model.summary()

In [9]:
# Train the Model
if data_exists:
    print("\nTraining the pneumonia model...")
    EPOCHS = 5 # Note: 5 epochs is low for real accuracy, but good for testing setup

    history = model.fit(
        train_generator,
        steps_per_epoch=max(1, train_generator.samples // BATCH_SIZE),
        epochs=EPOCHS,
        validation_data=validation_generator,
        validation_steps=max(1, validation_generator.samples // BATCH_SIZE)
    )


Training the pneumonia model...
Epoch 1/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 2s/step - accuracy: 0.7770 - loss: 0.5090 - val_accuracy: 0.8125 - val_loss: 0.5377
Epoch 2/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 1s/step - accuracy: 0.8848 - loss: 0.2697 - val_accuracy: 0.7500 - val_loss: 1.1013
Epoch 3/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 566ms/step - accuracy: 0.9120 - loss: 0.2245 - val_accuracy: 0.8125 - val_loss: 0.5866
Epoch 4/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 1s/step - accuracy: 0.9155 - loss: 0.2155 - val_accuracy: 0.8750 - val_loss: 0.4855
Epoch 5/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 1s/step - accuracy: 0.9241 - loss: 0.1878 - val_accuracy: 0.7500 - val_loss: 0.7006


In [10]:
# Evaluate the Model on the UNSEEN Test Set
if data_exists:
    print("\nEvaluating model on the unseen test set...")
    test_loss, test_accuracy = model.evaluate(
        test_generator,
        steps=max(1, test_generator.samples // BATCH_SIZE)
    )
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Evaluating model on the unseen test set...
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 748ms/step - accuracy: 0.8553 - loss: 0.3909
Test Loss: 0.3909
Test Accuracy: 85.53%


In [11]:
# Save the Keras Model
if data_exists:
    MODEL_DIR = '../models'
    os.makedirs(MODEL_DIR, exist_ok=True)
    model_filename = os.path.join(MODEL_DIR, 'pneumonia_model.keras')
    
    model.save(model_filename)

    print(f"\nModel training complete.")
    print(f"Model saved to {model_filename}")


Model training complete.
Model saved to ../models\pneumonia_model.keras


In [12]:
# Convert to TensorFlow Lite 
if data_exists:
    print("\nConverting model to TensorFlow Lite...")
    try:
        converter = tf.lite.TFLiteConverter.from_keras_model(model)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        
        tflite_model = converter.convert()
        
        tflite_model_filename = os.path.join(MODEL_DIR, 'pneumonia_model.tflite')
        with open(tflite_model_filename, 'wb') as f:
            f.write(tflite_model)
            
        print(f"Successfully converted and saved TFLite model to {tflite_model_filename}")

        # Compare file sizes
        keras_size = os.path.getsize(model_filename) / (1024 * 1024) # in MB
        tflite_size = os.path.getsize(tflite_model_filename) / (1024 * 1024) # in MB
        print(f"\nOriginal .keras model size: {keras_size:.2f} MB")
        print(f"New .tflite model size: {tflite_size:.2f} MB")

    except Exception as e:
        print(f"Error during TFLite conversion: {e}")


Converting model to TensorFlow Lite...
INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmpsh55inrv\assets


INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmpsh55inrv\assets


Saved artifact at 'C:\Users\Asus\AppData\Local\Temp\tmpsh55inrv'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 150, 150, 3), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  1907403853840: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1907403854368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1907403861408: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1907403860000: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1907403861056: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1907403858416: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1907403859648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1907403861232: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1907403857536: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1907403863344: TensorSpec(shape=(), dtype=tf.resource, name=None)
Successf