In [None]:
# libraries imports

In [5]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# For image data loading and augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# For building the CNN model (we'll use MobileNetV2 as a good starting point)
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model

# For model callbacks during training
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# For evaluation metrics and plotting
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import seaborn as sns

# Set a random seed for reproducibility (important for research!)
# This helps ensure that if you run the notebook multiple times,
# you'll get the same initial splits and model initialization.
tf.random.set_seed(42)
np.random.seed(42)

In [6]:
# define globals

In [8]:
# --- Image Parameters ---
IMG_HEIGHT = 224  # Standard height for many pre-trained models
IMG_WIDTH = 224   # Standard width for many pre-trained models
CHANNELS = 3      # Pre-trained models expect 3 channels (RGB).
                  # Ultrasound images are often grayscale (1 channel),
                  # so we'll convert them to 3 channels during loading by stacking.

# --- Training Parameters ---
BATCH_SIZE = 32   # Number of samples per gradient update
VALIDATION_SPLIT = 0.2 # Percentage of data to reserve for validation

# --- Data Directory ---
# !! IMPORTANT: Replace this with the actual path to your image dataset !!
# This directory should contain 'appendicitis' and 'no_appendicitis' subfolders.
data_dir = './images' # Example: make sure this path is correct

# Check if the data directory exists
if not os.path.exists(data_dir):
    print(f"Error: Data directory not found at '{data_dir}'")
    print("Please update the 'data_dir' variable with the correct path to your image data.")
else:
    print(f"Data directory found: {data_dir}")
    # You can also check for the subfolders
    if not os.path.exists(os.path.join(data_dir, 'appendicitis_images')):
        print("Warning: 'appendicitis' subfolder not found. Ensure class folders exist.")
    if not os.path.exists(os.path.join(data_dir, 'no_appendicitis')):
        print("Warning: 'no_appendicitis' subfolder not found. Ensure class folders exist.")

Data directory found: ./images


In [None]:
# data augmentation

In [None]:
print("Setting up ImageDataGenerators with augmentation...")

# --- Training Data Generator ---
train_datagen = ImageDataGenerator(
    rescale=1./255,                 # Normalize pixel values from [0, 255] to [0, 1]
    rotation_range=20,              # Randomly rotate images by up to 20 degrees
    width_shift_range=0.2,          # Randomly shift images horizontally
    height_shift_range=0.2,         # Randomly shift images vertically
    shear_range=0.2,                # Apply shear mapping
    zoom_range=0.2,                 # Randomly zoom in/out
    horizontal_flip=True,           # Randomly flip images horizontally
    fill_mode='nearest',            # Strategy for filling in new pixels created by transformations
    validation_split=VALIDATION_SPLIT # Reserve a percentage of data for validation
)

# --- Validation Data Generator ---
# Only rescaling for validation data; no augmentation should be applied
validation_datagen = ImageDataGenerator(
    rescale=1./255,                 # Normalize pixel values
    validation_split=VALIDATION_SPLIT # Use the same split for consistency
)

# --- Load Images from Directories ---
train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    color_mode='rgb',               # Ensure images are treated as RGB (3 channels)
                                    # Even if they are grayscale, ImageDataGenerator will convert them.
    batch_size=BATCH_SIZE,
    class_mode='binary',            # 'binary' for 2 classes (0 or 1)
    subset='training',              # Specify this is the training subset
    shuffle=True                    # Shuffle training data
)

validation_generator = validation_datagen.flow_from_from_directory(
    data_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    color_mode='rgb',
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation',            # Specify this is the validation subset
    shuffle=False                   # Do not shuffle validation data for consistent evaluation
)

print("\nData generators created successfully!")
print(f"Found {train_generator.samples} training images belonging to {train_generator.num_classes} classes.")
print(f"Found {validation_generator.samples} validation images belonging to {validation_generator.num_classes} classes.")
print(f"Class indices: {train_generator.class_indices}")