<a href="https://colab.research.google.com/github/EricSiq/Understanding-DeepLearning/blob/main/Experiment_No7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import core libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
import matplotlib.pyplot as plt
import os
import shutil

# Install Keras Tuner for hyperparameter tuning
!pip install -q keras-tuner

# Import Keras Tuner library
import keras_tuner as kt

# --- Data Download and Preparation ---
# This block automates downloading the dataset from Kaggle.
# It requires a 'kaggle.json' file with your API credentials.
try:
    # Attempt to install the Kaggle library
    !pip install -q kaggle

    # Check if kaggle.json is present
    if not os.path.exists('/root/.kaggle/kaggle.json'):
        print("Kaggle credentials not found. Please upload your kaggle.json file.")
        from google.colab import files
        files.upload() # This will prompt you to upload the file
        !mkdir -p ~/.kaggle
        !mv kaggle.json ~/.kaggle/
        !chmod 600 ~/.kaggle/kaggle.json

    # Download the dataset if it's not already downloaded
    dataset_name = 'paultimothymooney/chest-xray-pneumonia'
    zip_file = 'chest-xray-pneumonia.zip'
    dataset_dir = 'chest_xray'

    if not os.path.exists(dataset_dir):
        print(f"Downloading dataset: {dataset_name}...")
        !kaggle datasets download -d {dataset_name}
        print(f"Unzipping {zip_file}...")
        !unzip -q {zip_file}
        print("Dataset successfully downloaded and unzipped.")
    else:
        print("Dataset already exists. Skipping download.")

except Exception as e:
    print(f"An error occurred during data setup: {e}")

In [None]:
# --- Define Data Generators with Augmentation ---
try:
    # Define paths to the dataset folders
    train_dir = os.path.join(dataset_dir, 'chest_xray', 'train')
    val_dir = os.path.join(dataset_dir, 'chest_xray', 'val')
    test_dir = os.path.join(dataset_dir, 'chest_xray', 'test')

    # Basic error check for folder existence
    if not all(os.path.exists(d) for d in [train_dir, val_dir, test_dir]):
        raise FileNotFoundError("Dataset directories not found. Please check paths.")

    # Data augmentation for the training set to prevent overfitting
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    # Only rescale validation and test data
    test_datagen = ImageDataGenerator(rescale=1./255)

    # Use flow_from_directory to create generators
    IMG_SIZE = 150
    BATCH_SIZE = 32

    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary'
    )

    validation_generator = test_datagen.flow_from_directory(
        val_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary'
    )

    test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        shuffle=False
    )

except Exception as e:
    print(f"An error occurred during data preprocessing: {e}")

In [None]:
# --- Define a Hypermodel Building Function ---
def build_model(hp):
    """
    Builds a CNN model with tunable hyperparameters.

    Args:
      hp: A KerasTuner HyperParameters instance.

    Returns:
      A compiled Keras Sequential model.
    """
    model = Sequential()

    # Tune the number of convolutional layers
    num_conv_layers = hp.Int('num_conv_layers', min_value=2, max_value=4, step=1)

    for i in range(num_conv_layers):
        # Tune the number of filters in each conv layer
        filters = hp.Int(f'filters_{i}', min_value=32, max_value=128, step=32)
        model.add(Conv2D(
            filters=filters,
            kernel_size=(3, 3),
            activation='relu',
            input_shape=(IMG_SIZE, IMG_SIZE, 3) if i == 0 else None
        ))
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 2)))

    # Flatten the output for the dense layers
    model.add(Flatten())

    # Tune the number of dense layers
    num_dense_layers = hp.Int('num_dense_layers', min_value=1, max_value=2, step=1)
    for i in range(num_dense_layers):
        # Tune the number of units in each dense layer
        units = hp.Int(f'units_{i}', min_value=128, max_value=512, step=128)
        model.add(Dense(units=units, activation='relu'))
        # Tune the dropout rate
        dropout_rate = hp.Float(f'dropout_{i}', min_value=0.2, max_value=0.6, step=0.1)
        model.add(Dropout(rate=dropout_rate))

    # Final output layer
    model.add(Dense(1, activation='sigmoid'))

    # Tune the learning rate for the Adam optimizer
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model

# --- Instantiate and Run the Tuner ---
try:
    # Use the Hyperband tuner for efficient search
    tuner = kt.Hyperband(
        build_model,
        objective='val_accuracy',
        max_epochs=20, # Max number of epochs for a single trial
        factor=3,      # Reduction factor for Hyperband
        directory='my_dir',
        project_name='pneumonia_tuning'
    )

    print("Starting hyperparameter search...")

    # Define an EarlyStopping callback to prevent long training on poor models
    stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

    tuner.search(
        train_generator,
        epochs=50,
        validation_data=validation_generator,
        callbacks=[stop_early],
        steps_per_epoch=train_generator.samples // BATCH_SIZE
    )

    print("Hyperparameter search complete.")

    # Get the optimal hyperparameters and the best model
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    best_model = tuner.get_best_models(num_models=1)[0]

    print("\n--- Best Hyperparameters Found ---")
    print(f"Number of Conv Layers: {best_hps.get('num_conv_layers')}")
    print(f"Number of Dense Layers: {best_hps.get('num_dense_layers')}")
    print(f"Optimal learning rate: {best_hps.get('learning_rate'):.5f}")

except Exception as e:
    print(f"An error occurred during hyperparameter tuning: {e}")