In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
import shutil
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
from PIL import Image
from itertools import cycle
from skimage import color

from tensorflow import keras
from tensorflow.keras import layers
from scipy.signal import wiener

from sklearn.metrics import (
    confusion_matrix, classification_report, roc_curve, auc,
    precision_recall_curve, matthews_corrcoef
)
from sklearn.preprocessing import label_binarize
from sklearn.manifold import TSNE
from umap import UMAP

from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import (
    VGG16, VGG19, ResNet50, InceptionV3, Xception, MobileNetV2, 
    DenseNet121, EfficientNetB0, InceptionResNetV2
)
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input, Average
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.metrics import Precision

2025-08-05 11:03:09.276544: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754391789.449128      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754391789.503202      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# --- 1. CONFIGURATION AND SETUP ---

# --- Dataset and Output Paths ---

In [3]:
SOURCE_DIR = '/kaggle/input/multicancer-dataset/Cancer_Dataset'
BASE_DIR = '/kaggle/working/cervix_cancer_dataset'
OUTPUT_DIR = '/kaggle/working/model_outputs'

# --- Model and Training Parameters ---

In [4]:
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32
EPOCHS = 25
EARLY_STOPPING_PATIENCE = 10


In [5]:
CLASS_MAP = {
    "cervix_dyk": "Dyskeratosis",
    "cervix_koc": "Koilocytes",
    "cervix_mep": "Metaplastic_cells",
    "cervix_pab": "Parabasal_cells",
    "cervix_sfi": "Superficial_Intermediate_cells"
}
# Data_Selector = 'cervix_'
# # NEW: This will be set dynamically based on the folders found.
NUM_CLASSES = 0 

# --- Preprocessing Toggles ---

In [None]:
APPLY_SEGMENTATION = False
APPLY_CLAHE = False
APPLY_GAUSSIAN_BLUR = False 
APPLY_MEDIAN_FILTER = False
APPLY_WIENER_FILTER = False
APPLY_HISTOGRAM_EQUALIZATION = False
APPLY_LAPLACIAN_FILTER = False
APPLY_AVERAGE_FILTER = False
APPLY_SOBEL_FILTER = False
APPLY_CANNY_FILTER = False
APPLY_STAIN_NORMALIZATION = False

# --- Advanced Pre-processing & Augmentation Flags ---
APPLY_HAIR_REMOVAL = False
APPLY_BILATERAL_FILTER = False
APPLY_RANDOM_ERASING = False
APPLY_GAN_AUGMENTATION = False # Master switch for GAN

In [7]:
# --- GAN-Specific Configuration (MODIFIED) ---
if APPLY_GAN_AUGMENTATION:
    CLASSES_TO_AUGMENT = TARGET_CLASSES
    NUM_IMAGES_TO_GENERATE = 100
    GAN_IMG_SIZE = 224  # MODIFIED: Changed to 224 to match the classifier
    LATENT_DIM = 100
    GAN_EPOCHS = 75 # Increased epochs slightly for the more complex generation task

In [8]:
def setup_directories():
    """
    Collects images from 'train' and 'val' source subfolders, and then
    creates a new, cleanly named dataset with a fresh train/val/test split.
    """
    # 1. Clean up old directory and create the new structure.
    if os.path.exists(BASE_DIR):
        shutil.rmtree(BASE_DIR)
    
    train_dir = os.path.join(BASE_DIR, 'train')
    val_dir = os.path.join(BASE_DIR, 'val')
    test_dir = os.path.join(BASE_DIR, 'test')
    os.makedirs(train_dir); os.makedirs(val_dir); os.makedirs(test_dir)

    print(f"New dataset will be created at: {BASE_DIR}")

    # 2. Iterate through your class mapping.
    for source_folder, dest_folder in CLASS_MAP.items():
        print(f"\nProcessing: '{source_folder}' -> '{dest_folder}'")
        
        all_files = []
        
        # 3. Collect all files from BOTH the train and val directories.
        for subfolder in ['train', 'val']:
            src_path = os.path.join(SOURCE_DIR, subfolder, source_folder)
            
            if os.path.exists(src_path):
                files_found = [os.path.join(src_path, f) for f in os.listdir(src_path)
                               if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
                all_files.extend(files_found)
                print(f"  - Found {len(files_found)} images in '{subfolder}' directory.")
            else:
                print(f"  - WARNING: Directory not found, skipping: {src_path}")

        if not all_files:
            print(f"  - No images found for this class. Skipping.")
            continue
            
        # 4. Create the new destination folders (e.g., 'train/Dyskeratosis').
        for d in [train_dir, val_dir, test_dir]:
            os.makedirs(os.path.join(d, dest_folder), exist_ok=True)
            
        # 5. Shuffle and split the combined list of files.
        np.random.shuffle(all_files)
        train_end = int(len(all_files) * 0.7)
        val_end = train_end + int(len(all_files) * 0.15)
        
        train_files = all_files[:train_end]
        val_files = all_files[train_end:val_end]
        test_files = all_files[val_end:]
        
        # 6. Copy the files into their new homes.
        for f_path in train_files: shutil.copy(f_path, os.path.join(train_dir, dest_folder, os.path.basename(f_path)))
        for f_path in val_files: shutil.copy(f_path, os.path.join(val_dir, dest_folder, os.path.basename(f_path)))
        for f_path in test_files: shutil.copy(f_path, os.path.join(test_dir, dest_folder, os.path.basename(f_path)))
            
    print("\nDataset setup and file copying complete!")



# --- 2. PREPROCESSING FUNCTIONS ---

In [9]:
def build_generator(latent_dim):
    """
    Builds the DCGAN Generator model, re-architected to output 224x224 images.
    """
    model = keras.Sequential([
        # Start with a 7x7 spatial size
        layers.Dense(7 * 7 * 256, use_bias=False, input_shape=(latent_dim,)),
        layers.BatchNormalization(),
        layers.LeakyReLU(),
        layers.Reshape((7, 7, 256)),

        # Upsampling block 1: 7x7 -> 14x14
        layers.Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),

        # Upsampling block 2: 14x14 -> 28x28
        layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),

        # Upsampling block 3: 28x28 -> 56x56
        layers.Conv2DTranspose(32, (5, 5), strides=(2, 2), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),

        # Upsampling block 4: 56x56 -> 112x112
        layers.Conv2DTranspose(16, (5, 5), strides=(2, 2), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),
        
        # Upsampling block 5: 112x112 -> 224x224
        layers.Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'),
        # Final output shape: (224, 224, 3)
    ], name="generator")
    return model

def build_discriminator(input_shape):
    """Builds the DCGAN Discriminator model."""
    model = keras.Sequential([
        layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=input_shape),
        layers.LeakyReLU(),
        layers.Dropout(0.3),
        
        layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'),
        layers.LeakyReLU(),
        layers.Dropout(0.3),
        
        layers.Conv2D(256, (5, 5), strides=(2, 2), padding='same'),
        layers.LeakyReLU(),
        layers.Dropout(0.3),
        
        layers.Flatten(),
        layers.Dense(1) # Logit output
    ], name="discriminator")
    return model

def train_gan_and_generate_images(class_name, num_to_generate, base_train_dir):
    """Trains a DCGAN on a specific class and saves generated images to the training folder."""
    print(f"\n--- Starting GAN Augmentation for class: {class_name} ---")
    target_dir = os.path.join(base_train_dir, class_name)
    if not os.path.exists(target_dir):
        print(f"ERROR: Cannot find source directory for GAN training: {target_dir}")
        return

    # 1. Load Data, resizing to the new GAN_IMG_SIZE
    real_images = []
    for filename in os.listdir(target_dir):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            img = load_img(os.path.join(target_dir, filename), target_size=(GAN_IMG_SIZE, GAN_IMG_SIZE))
            real_images.append(img_to_array(img))
    
    if len(real_images) < BATCH_SIZE:
        print(f"WARNING: Not enough images ({len(real_images)}) in {target_dir} to train GAN with batch size {BATCH_SIZE}. Skipping.")
        return
        
    real_images = (np.array(real_images) - 127.5) / 127.5
    train_dataset = tf.data.Dataset.from_tensor_slices(real_images).shuffle(len(real_images)).batch(BATCH_SIZE, drop_remainder=True)

    # 2. Build Models with the correct input shape
    generator = build_generator(LATENT_DIM)
    discriminator = build_discriminator((GAN_IMG_SIZE, GAN_IMG_SIZE, 3))
    cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

    def discriminator_loss(real_output, fake_output):
        return cross_entropy(tf.ones_like(real_output), real_output) + cross_entropy(tf.zeros_like(fake_output), fake_output)

    def generator_loss(fake_output):
        return cross_entropy(tf.ones_like(fake_output), fake_output)

    gen_optimizer = tf.keras.optimizers.Adam(1.5e-4, beta_1=0.5)
    disc_optimizer = tf.keras.optimizers.Adam(1.5e-4, beta_1=0.5)

    @tf.function
    def train_step(images):
        noise = tf.random.normal([images.shape[0], LATENT_DIM])
        with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
            generated_images = generator(noise, training=True)
            real_output = discriminator(images, training=True)
            fake_output = discriminator(generated_images, training=True)
            gen_loss = generator_loss(fake_output)
            disc_loss = discriminator_loss(real_output, fake_output)
        
        grads_gen = gen_tape.gradient(gen_loss, generator.trainable_variables)
        grads_disc = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
        gen_optimizer.apply_gradients(zip(grads_gen, generator.trainable_variables))
        disc_optimizer.apply_gradients(zip(grads_disc, discriminator.trainable_variables))

    # 3. Training Loop
    print(f"Training GAN for {GAN_EPOCHS} epochs on {GAN_IMG_SIZE}x{GAN_IMG_SIZE} images...")
    for epoch in range(GAN_EPOCHS):
        for image_batch in train_dataset:
            train_step(image_batch)
        if (epoch + 1) % 10 == 0:
            print(f"  - GAN Epoch {epoch + 1}/{GAN_EPOCHS} completed.")

    # 4. Generate and Save Images
    print("Generating synthetic images...")
    noise = tf.random.normal([num_to_generate, LATENT_DIM])
    generated_images = generator(noise, training=False)
    generated_images = (generated_images * 127.5 + 127.5).numpy().astype(np.uint8)
    
    for i, img_array in enumerate(generated_images):
        img = array_to_img(img_array)
        # REMOVED: No longer need to resize, as the GAN generates at the correct size
        img.save(os.path.join(target_dir, f'synthetic_gan_{i+1}.png'))
        
    print(f"Successfully added {num_to_generate} synthetic images to '{target_dir}'")

In [10]:
def apply_stain_normalization(image):
    """
    Applies Reinhard stain normalization to an RGB image.
    This function standardizes the color profile of images to make the model
    more robust to variations in staining.
    
    Args:
        image (numpy.ndarray): An input image in RGB format (uint8).
        
    Returns:
        numpy.ndarray: The stain-normalized image in RGB format (uint8).
    """
    # Convert image to the LAB color space, which separates color from intensity.
    # The conversion to float32 is necessary for the color space calculations.
    img_lab = color.rgb2lab(image.astype(np.float32) / 255.0)

    # These target statistics are a common reference for H&E stained tissue.
    # They can be fine-tuned if you have a specific, ideal reference image.
    target_means = [62.4, -0.0001, 0.0001] # Corresponds to L*, a*, b*
    target_stds = [31.0, 1.0, 1.0]

    # Calculate the means and standard deviations of the source image's channels.
    src_means = [np.mean(img_lab[:, :, i]) for i in range(3)]
    src_stds = [np.std(img_lab[:, :, i]) for i in range(3)]

    # Apply the normalization formula to each channel.
    normalized_lab = np.zeros_like(img_lab)
    for i in range(3):
        # (channel - src_mean) / src_std * target_std + target_mean
        normalized_lab[:, :, i] = (img_lab[:, :, i] - src_means[i]) / (src_stds[i] + 1e-8) * target_stds[i] + target_means[i]

    # Convert the normalized LAB image back to the RGB color space.
    normalized_rgb = color.lab2rgb(normalized_lab)
    
    # Clip values to the valid [0, 1] range and convert back to uint8 [0, 255].
    return (np.clip(normalized_rgb, 0, 1) * 255).astype(np.uint8)

def apply_hair_removal(image):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (17, 17))
    black_hat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel)
    _, mask = cv2.threshold(black_hat, 10, 255, cv2.THRESH_BINARY)
    return cv2.inpaint(image, mask, 3, cv2.INPAINT_TELEA)

def apply_bilateral_filter(image):
    return cv2.bilateralFilter(image, d=9, sigmaColor=75, sigmaSpace=75)

def random_erasing(img):
    if np.random.rand() > 0.5: return img
    h, w, _ = img.shape
    x = np.random.randint(0, w)
    y = np.random.randint(0, h)
    h_erase = int(h * np.random.uniform(0.05, 0.2))
    w_erase = int(w * np.random.uniform(0.05, 0.2))
    img[y:y+h_erase, x:x+w_erase] = np.random.randint(0, 255)
    return img


def apply_segmentation(image):
    """
    Segments the image to isolate key cellular structures.
    This version uses a fixed grayscale threshold, which is often more reliable
    for cytology images than Otsu's method, as it focuses on capturing the
    darker nuclei without being influenced by background variations.
    
    Args:
        image (numpy.ndarray): An input image in RGB format (uint8).
        
    Returns:
        numpy.ndarray: The segmented image with background removed.
    """
    # Convert the image to grayscale to work with intensity values.
    gray = color.rgb2gray(image)
    
    # Apply a fixed threshold. Pixels darker than this value (likely nuclei)
    # will be kept. The value 240 is a good starting point but can be tuned.
    # It effectively removes very light/white background areas.
    _, mask = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
    
    # Use the mask to black out the background.
    segmented_image = cv2.bitwise_and(image, image, mask=mask)
    
    return segmented_image

def apply_clahe(image):
    lab_image = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab_image)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    cl = clahe.apply(l)
    merged_channels = cv2.merge([cl, a, b])
    return cv2.cvtColor(merged_channels, cv2.COLOR_LAB2RGB)

def apply_gaussian_blur(image):
    return cv2.GaussianBlur(image, (5, 5), 0)

def apply_median_blur(image):
    return cv2.medianBlur(image, 5)

def apply_wiener_filter(image):
    img_float = image.astype(np.float64) / 255.0
    filtered_channels = [wiener(channel) for channel in cv2.split(img_float)]
    filtered_image = cv2.merge(filtered_channels)
    return (np.clip(filtered_image, 0, 1) * 255).astype(np.uint8)

def apply_histogram_equalization(image):
    img_ycrcb = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)
    img_ycrcb[:, :, 0] = cv2.equalizeHist(img_ycrcb[:, :, 0])
    return cv2.cvtColor(img_ycrcb, cv2.COLOR_YCrCb2RGB)

def apply_laplacian_filter(image):
    laplacian = cv2.Laplacian(image, cv2.CV_64F)
    abs_laplacian = np.absolute(laplacian)
    return np.uint8(abs_laplacian)

def apply_average_filter(image):
    return cv2.blur(image, (5, 5))

def apply_sobel_filter(image):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5)
    sobel_combined = np.sqrt(sobelx**2 + sobely**2)
    sobel_norm = cv2.normalize(sobel_combined, None, 0, 255, cv2.NORM_MINMAX)
    sobel_uint8 = np.uint8(sobel_norm)
    return cv2.cvtColor(sobel_uint8, cv2.COLOR_GRAY2RGB)

def apply_canny_filter(image):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    edges = cv2.Canny(gray, threshold1=100, threshold2=200)
    return cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

def get_preprocessing_function(model_specific_preprocess_input, is_training=False):
    """
    Creates a master preprocessing function that applies a pipeline of filters
    and augmentations before applying model-specific scaling.
    """
    def master_preprocessor(image):
        # Start with a copy to ensure original data isn't altered unexpectedly.
        # Convert to uint8 for OpenCV compatibility.
        processed_image = image.astype('uint8')

        # --- Universal Pre-processing Pipeline ---
        # These filters are applied to all images for all models.
        if APPLY_STAIN_NORMALIZATION:
            processed_image = apply_stain_normalization(processed_image)
        if APPLY_HAIR_REMOVAL:
            processed_image = apply_hair_removal(processed_image)
        if APPLY_BILATERAL_FILTER:
            processed_image = apply_bilateral_filter(processed_image)
        if APPLY_SEGMENTATION:
            processed_image = apply_segmentation(processed_image)
        if APPLY_WIENER_FILTER:
            processed_image = apply_wiener_filter(processed_image)
        if APPLY_MEDIAN_FILTER:
            processed_image = apply_median_blur(processed_image)
        if APPLY_AVERAGE_FILTER:
            processed_image = apply_average_filter(processed_image)
        if APPLY_GAUSSIAN_BLUR:
            processed_image = apply_gaussian_blur(processed_image)
        if APPLY_CLAHE:
            processed_image = apply_clahe(processed_image)
        if APPLY_HISTOGRAM_EQUALIZATION:
            processed_image = apply_histogram_equalization(processed_image)
        if APPLY_LAPLACIAN_FILTER:
            processed_image = apply_laplacian_filter(processed_image)
        if APPLY_SOBEL_FILTER:
            processed_image = apply_sobel_filter(processed_image)
        if APPLY_CANNY_FILTER:
            processed_image = apply_canny_filter(processed_image)

        # Convert to float32 for augmentation and model input
        processed_image = processed_image.astype('float32')

        # --- Training-Only Augmentation ---
        if is_training and APPLY_RANDOM_ERASING:
            processed_image = random_erasing(processed_image)

        # --- Final Model-Specific Processing ---
        # This is the last step. It applies the specific function for pre-trained
        # models or a simple rescale for the CustomCNN.
        if model_specific_preprocess_input:
            return model_specific_preprocess_input(processed_image)
        else:
            return processed_image / 255.0
            
    return master_preprocessor

# --- Function to Create the Custom CNN Model ---

In [11]:
def create_custom_cnn(input_shape, num_output_units, last_layer_activation):
    """Builds and returns the user-defined custom CNN model."""
    model = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(16, (1, 1), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(3, 3),
        layers.Conv2D(32, (1, 1), activation='relu'),
        layers.Conv2D(32, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(3, 3),
        layers.Conv2D(64, (1, 1), activation='relu'),
        layers.BatchNormalization(),
        layers.Conv2D(64, (5, 5), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(3, 3),
        layers.Conv2D(16, (1, 1), activation='relu'),
        layers.BatchNormalization(),
        layers.Conv2D(16, (5, 5), activation='relu'),
        layers.BatchNormalization(),
        # Add a feature extractor layer name for Grad-CAM and feature projection
        layers.GlobalAveragePooling2D(name='feature_extractor_layer'),
        layers.Dense(num_output_units, activation=last_layer_activation)
    ])
    print("--- Custom CNN Model Summary ---")
    model.summary()
    return model

# --- 3. VISUALIZATION AND REPORTING FUNCTIONS ---

In [12]:
def plot_training_history(history, model_name, save_dir):
    """Plots accuracy, loss, and precision from the model's history."""
    fig, axes = plt.subplots(1, 3, figsize=(20, 5))
    metrics = ['accuracy', 'loss', 'precision']
    for i, metric in enumerate(metrics):
        val_metric = f'val_{metric}'
        axes[i].plot(history.history[metric], label=f'Train {metric.capitalize()}')
        axes[i].plot(history.history[val_metric], label=f'Validation {metric.capitalize()}')
        axes[i].set_title(f'{model_name} - {metric.capitalize()}')
        axes[i].legend()
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, f'{model_name}_training_history.png'))
    plt.show()

def plot_confusion_matrix(y_true, y_pred, class_names, model_name, save_dir):
    """Plots a confusion matrix."""
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title(f'{model_name} - Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.savefig(os.path.join(save_dir, f'{model_name}_confusion_matrix.png'))
    plt.show()

def plot_roc_pr_curves(y_true, y_pred_prob, class_names, model_name, save_dir):
    """
    Plots ROC and Precision-Recall curves for binary AND multiclass classification.
    For multiclass, it uses the One-vs-Rest (OvR) strategy.
    """
    n_classes = len(class_names)
    fig, axes = plt.subplots(1, 2, figsize=(18, 7))

    # --- ROC Curve (Binary and Multiclass) ---
    if n_classes == 2:
        # Standard binary case
        fpr, tpr, _ = roc_curve(y_true, y_pred_prob)
        roc_auc = auc(fpr, tpr)
        axes[0].plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
    else:
        # Multiclass case (One-vs-Rest)
        y_true_bin = label_binarize(y_true, classes=range(n_classes))
        fpr = dict()
        tpr = dict()
        roc_auc = dict()

        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_pred_prob[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])

        # Plot each class's ROC curve
        colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'green', 'red', 'purple'])
        for i, color in zip(range(n_classes), colors):
            axes[0].plot(fpr[i], tpr[i], color=color, lw=2,
                         label=f'ROC curve of {class_names[i]} (AUC = {roc_auc[i]:.2f})')

    axes[0].plot([0, 1], [0, 1], 'k--', lw=2)
    axes[0].set_xlim([0.0, 1.0])
    axes[0].set_ylim([0.0, 1.05])
    axes[0].set_xlabel('False Positive Rate')
    axes[0].set_ylabel('True Positive Rate')
    axes[0].set_title(f'{model_name} - Receiver Operating Characteristic')
    axes[0].legend(loc="lower right")

    # --- Precision-Recall Curve (Binary and Multiclass) ---
    if n_classes == 2:
        # Standard binary case
        precision, recall, _ = precision_recall_curve(y_true, y_pred_prob)
        pr_auc = auc(recall, precision)
        axes[1].plot(recall, precision, color='blue', lw=2, label=f'PR curve (AP = {pr_auc:.2f})')
    else:
        # Multiclass case (One-vs-Rest)
        precision = dict()
        recall = dict()
        pr_auc = dict()

        for i in range(n_classes):
            precision[i], recall[i], _ = precision_recall_curve(y_true_bin[:, i], y_pred_prob[:, i])
            pr_auc[i] = auc(recall[i], precision[i])

        # Plot each class's PR curve
        colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'green', 'red', 'purple'])
        for i, color in zip(range(n_classes), colors):
            axes[1].plot(recall[i], precision[i], color=color, lw=2,
                         label=f'PR curve of {class_names[i]} (AP = {pr_auc[i]:.2f})')

    axes[1].set_xlim([0.0, 1.0])
    axes[1].set_ylim([0.0, 1.05])
    axes[1].set_xlabel('Recall')
    axes[1].set_ylabel('Precision')
    axes[1].set_title(f'{model_name} - Precision-Recall Curve')
    axes[1].legend(loc="lower left")

    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, f'{model_name}_roc_pr_curves.png'))
    plt.show()

def plot_projections(features, labels, class_names, model_name, save_dir):
    """Plots t-SNE and UMAP projections of features."""
    fig, axes = plt.subplots(1, 2, figsize=(18, 7))
    
    tsne = TSNE(n_components=2, random_state=42, perplexity=min(30, len(features)-1)).fit_transform(features)
    df_tsne = pd.DataFrame({'x': tsne[:, 0], 'y': tsne[:, 1], 'label': [class_names[l] for l in labels]})
    sns.scatterplot(data=df_tsne, x='x', y='y', hue='label', ax=axes[0], palette='viridis').set_title(f'{model_name} - t-SNE')
    
    umap_proj = UMAP(n_neighbors=15, min_dist=0.1, random_state=42).fit_transform(features)
    df_umap = pd.DataFrame({'x': umap_proj[:, 0], 'y': umap_proj[:, 1], 'label': [class_names[l] for l in labels]})
    sns.scatterplot(data=df_umap, x='x', y='y', hue='label', ax=axes[1], palette='viridis').set_title(f'{model_name} - UMAP')
    
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, f'{model_name}_projections.png'))
    plt.show()

def make_gradcam_heatmap(img_array, model, last_conv_layer_name):
    """Creates a Grad-CAM heatmap."""
    grad_model = Model([model.inputs], [model.get_layer(last_conv_layer_name).output, model.output])
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        # For binary with sigmoid, the class channel is the output itself.
        class_channel = preds
    grads = tape.gradient(class_channel, last_conv_layer_output)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    heatmap = last_conv_layer_output[0] @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

def save_and_display_gradcam(img_path, heatmap, cam_path, alpha=0.4):
    """Saves a superimposed Grad-CAM image."""
    img = cv2.imread(img_path); img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
    heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0])); heatmap = np.uint8(255 * heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    superimposed_img = np.clip(heatmap * alpha + img, 0, 255).astype('uint8')
    cv2.imwrite(cam_path, superimposed_img)

def visualize_class_maps(model, last_conv_layer_name, preprocessor, model_name, save_dir, test_dir_path):
    """Displays Grad-CAM for one sample from each class in a 1xN layout."""
    class_names = sorted(os.listdir(test_dir_path))
    plt.figure(figsize=(12, 6))
    for i, class_name in enumerate(class_names):
        img_path = os.path.join(test_dir_path, class_name, os.listdir(os.path.join(test_dir_path, class_name))[0])
        img_array = img_to_array(load_img(img_path, target_size=(IMG_HEIGHT, IMG_WIDTH)))
        img_preprocessed = preprocessor(img_array.copy()) if preprocessor else img_array / 255.0
        img_for_model = np.expand_dims(img_preprocessed, axis=0)
        heatmap = make_gradcam_heatmap(img_for_model, model, last_conv_layer_name)
        cam_path = os.path.join(save_dir, f'{model_name}_gradcam_{class_name}.png')
        save_and_display_gradcam(img_path, heatmap, cam_path)
        ax = plt.subplot(1, len(class_names), i + 1)
        ax.imshow(cv2.cvtColor(cv2.imread(cam_path), cv2.COLOR_BGR2RGB)); ax.set_title(f'Grad-CAM: {class_name}'); ax.axis("off")
    plt.tight_layout(); plt.show()

def visualize_predictions(y_true, y_pred, test_generator, class_names, model_name, save_dir, num_examples_per_class=2):
    """Shows sample predictions, highlighting correct and incorrect ones."""
    filenames = test_generator.filenames
    examples_shown = {name: 0 for name in class_names}
    fig, axes = plt.subplots(nrows=num_examples_per_class, ncols=len(class_names), figsize=(18, 5 * num_examples_per_class), squeeze=False)
    fig.suptitle(f'{model_name} - Prediction Samples', fontsize=20)
    for i in range(len(filenames)):
        if all(v >= num_examples_per_class for v in examples_shown.values()): break
        true_label_idx = y_true[i]
        true_label_name = class_names[true_label_idx]
        if examples_shown[true_label_name] < num_examples_per_class:
            img_path = os.path.join(test_generator.directory, filenames[i])
            ax = axes[examples_shown[true_label_name], true_label_idx]
            ax.imshow(load_img(img_path))
            ax.axis('off')
            title_color = 'green' if y_pred[i] == true_label_idx else 'red'
            ax.set_title(f"True: {true_label_name}\nPred: {class_names[y_pred[i]]}", color=title_color)
            examples_shown[true_label_name] += 1
    fig.tight_layout(rect=[0, 0, 1, 0.96]); plt.savefig(os.path.join(save_dir, f'{model_name}_prediction_samples.png')); plt.show()

# --- 4. MAIN TRAINING & EVALUATION LOOP ---

 # --- Evaluation & Visualization ---

In [None]:
# --- Run the Setup ---
setup_directories()

# You can now use these variables to point to your new dataset
train_dir = os.path.join(BASE_DIR, 'train')
val_dir = os.path.join(BASE_DIR, 'val')
test_dir = os.path.join(BASE_DIR, 'test')

NUM_CLASSES = len(os.listdir(train_dir))

if APPLY_GAN_AUGMENTATION and NUM_CLASSES >= 2:
    for class_name in CLASSES_TO_AUGMENT:
        if class_name in TARGET_CLASSES:
            train_gan_and_generate_images(class_name, NUM_IMAGES_TO_GENERATE, train_dir)
        else:
            print(f"WARNING: Class '{class_name}' for GAN is not in TARGET_CLASSES. Skipping.")


# --- Model Registry ---
MODELS = {
    'CustomCNN': (None, None),
    'VGG16': (VGG16, tf.keras.applications.vgg16.preprocess_input),
    'VGG19': (VGG19, tf.keras.applications.vgg19.preprocess_input),
    'ResNet50': (ResNet50, tf.keras.applications.resnet50.preprocess_input),
    'InceptionV3': (InceptionV3, tf.keras.applications.inception_v3.preprocess_input),
    'Xception': (Xception, tf.keras.applications.xception.preprocess_input),
    'MobileNetV2': (MobileNetV2, tf.keras.applications.mobilenet_v2.preprocess_input),
    'DenseNet121': (DenseNet121, tf.keras.applications.densenet.preprocess_input),
    'EfficientNetB0': (EfficientNetB0, tf.keras.applications.efficientnet.preprocess_input),
    'InceptionResNetV2': (InceptionResNetV2, tf.keras.applications.inception_resnet_v2.preprocess_input),
}


# --- Main Training and Evaluation Loop ---
if NUM_CLASSES >= 2:
    # --- DYNAMIC CONFIGURATION BASED ON CLASS COUNT ---
    if NUM_CLASSES == 2:
        class_mode = 'binary'
        loss_function = 'binary_crossentropy'
        last_layer_activation = 'sigmoid'
        num_output_units = 1
    else:  # Multi-class
        class_mode = 'categorical'
        loss_function = 'categorical_crossentropy'
        last_layer_activation = 'softmax'
        num_output_units = NUM_CLASSES

    print(f"\n--- Setting up training for {NUM_CLASSES} classes ---")
    print(f"Class Mode: {class_mode}, Loss: {loss_function}, Activation: {last_layer_activation}")

    for model_name, (model_constructor, preprocess_input) in MODELS.items():
        print(f"\n{'='*25} Training and Evaluating: {model_name} {'='*25}")
        model_save_dir = os.path.join(OUTPUT_DIR, model_name)
        os.makedirs(model_save_dir, exist_ok=True)

        # ******************************************************************
        # --- SECTION CHANGED: Corrected Data Generators ---
        # This implementation now correctly uses your custom get_preprocessing_function.
        print(f"Instantiating preprocessor for {model_name}...")
        
        # For CustomCNN, pass `None`. get_preprocessing_function handles this.
        model_specific_preprocessing = None if model_name == 'CustomCNN' else preprocess_input

        # Create the master preprocessor for training data (with augmentations)
        train_preprocessor = get_preprocessing_function(
            model_specific_preprocess_input=model_specific_preprocessing,
            is_training=True  # Enables training-only augmentations
        )

        # Create the master preprocessor for validation/test data (no augmentations)
        val_test_preprocessor = get_preprocessing_function(
            model_specific_preprocess_input=model_specific_preprocessing,
            is_training=False # Disables augmentations for consistent evaluation
        )

        # Use these master preprocessors in your ImageDataGenerator
        train_datagen = ImageDataGenerator(preprocessing_function=train_preprocessor)
        val_test_datagen = ImageDataGenerator(preprocessing_function=val_test_preprocessor)
        # ******************************************************************


        train_generator = train_datagen.flow_from_directory(
            train_dir, target_size=(IMG_HEIGHT, IMG_WIDTH),
            batch_size=BATCH_SIZE, class_mode=class_mode
        )
        validation_generator = val_test_datagen.flow_from_directory(
            val_dir, target_size=(IMG_HEIGHT, IMG_WIDTH),
            batch_size=BATCH_SIZE, class_mode=class_mode
        )
        test_generator = val_test_datagen.flow_from_directory(
            test_dir, target_size=(IMG_HEIGHT, IMG_WIDTH),
            batch_size=BATCH_SIZE, class_mode=class_mode, shuffle=False
        )

        # --- Model Building ---
        if model_name == 'CustomCNN':
            model = create_custom_cnn((IMG_HEIGHT, IMG_WIDTH, 3), num_output_units, last_layer_activation)
        else:
            base_model = model_constructor(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
            base_model.trainable = False
            x = GlobalAveragePooling2D(name='feature_extractor_layer')(base_model.output)
            x = Dense(128, activation='relu')(x)
            x = Dropout(0.5)(x)
            predictions = Dense(num_output_units, activation=last_layer_activation)(x)
            model = Model(inputs=base_model.input, outputs=predictions)

        # --- Model Training ---
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss=loss_function, metrics=['accuracy', Precision(name='precision')])
        best_model_path = os.path.join(model_save_dir, f'{model_name}_best.keras')
        callbacks = [
            EarlyStopping(monitor='val_accuracy', patience=EARLY_STOPPING_PATIENCE, restore_best_weights=True),
            ModelCheckpoint(filepath=best_model_path, save_best_only=True, monitor='val_accuracy')
        ]
        history = model.fit(train_generator, epochs=EPOCHS, validation_data=validation_generator, callbacks=callbacks)

        # --- Evaluation & Visualization ---
        print(f"\n--- Loading best model from '{best_model_path}' for evaluation ---")
        model = tf.keras.models.load_model(best_model_path)
        plot_training_history(history, model_name, model_save_dir)

        y_pred_prob = model.predict(test_generator)
        y_true = test_generator.classes
        class_names = list(test_generator.class_indices.keys())

        if class_mode == 'binary':
            y_pred = (y_pred_prob.flatten() > 0.5).astype(int)
        else: # categorical
            y_pred = np.argmax(y_pred_prob, axis=1)

        print(f'\nClassification Report for {model_name}:\n')
        print(classification_report(y_true, y_pred, target_names=class_names, digits=2))

        mcc = matthews_corrcoef(y_true, y_pred)
        print(f"Overall Matthew's Correlation Coefficient (MCC): {mcc:.2f}\n")

        cm = confusion_matrix(y_true, y_pred)
        report_dict = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)

        metrics_data = []
        for i, class_name in enumerate(class_names):
            TP = cm[i, i]
            FP = cm[:, i].sum() - TP
            FN = cm[i, :].sum() - TP
            TN = cm.sum() - (TP + FP + FN)
            specificity = TN / (TN + FP) if (TN + FP) > 0 else 0.0
            
            metrics_data.append({
                "Class": class_name,
                "Precision": report_dict[class_name]['precision'],
                "Recall (Sensitivity)": report_dict[class_name]['recall'],
                "F1-Score": report_dict[class_name]['f1-score'],
                "Specificity": specificity
            })

        metrics_df = pd.DataFrame(metrics_data)
        print("Detailed Per-Class Metrics Summary:")
        print(metrics_df.to_string(index=False, float_format="%.4f"))

        avg_specificity = metrics_df['Specificity'].mean()
        print(f"\nAverage Specificity: {avg_specificity:.4f}")

        plot_confusion_matrix(y_true, y_pred, class_names, model_name, model_save_dir)

        y_pred_for_curves = y_pred_prob.flatten() if class_mode == 'binary' else y_pred_prob
        plot_roc_pr_curves(y_true, y_pred_for_curves, class_names, model_name, model_save_dir)

        feature_extractor = Model(inputs=model.inputs, outputs=model.get_layer('feature_extractor_layer').output)
        test_features = feature_extractor.predict(test_generator)
        plot_projections(test_features, y_true, class_names, model_name, model_save_dir)

        last_conv_layer_name = next((layer.name for layer in reversed(model.layers) if 'conv' in layer.name.lower()), None)
        # For Grad-CAM, pass the raw preprocessor for the model, not the one with augmentations
        grad_cam_preprocessor = get_preprocessing_function(model_specific_preprocessing, is_training=False)
        if last_conv_layer_name and model_name != 'CustomCNN':
            print(f"Generating Grad-CAM for {model_name} using layer: {last_conv_layer_name}")
            visualize_class_maps(model, last_conv_layer_name, grad_cam_preprocessor, model_name, model_save_dir, test_dir)
        else:
            print(f"Skipping Grad-CAM visualization for {model_name}.")

        visualize_predictions(y_true, y_pred, test_generator, class_names, model_name, model_save_dir)
        print(f"\nFinished processing {model_name}. Results saved to {model_save_dir}")

    print("\nAll models have been trained and evaluated.")

else:
    print("\nSkipping model training because a valid dataset with 2 or more classes was not created.")

New dataset will be created at: /kaggle/working/cervix_cancer_dataset

Processing: 'cervix_dyk' -> 'Dyskeratosis'
  - Found 4000 images in 'train' directory.
  - Found 1000 images in 'val' directory.

Processing: 'cervix_koc' -> 'Koilocytes'
  - Found 4000 images in 'train' directory.
  - Found 1000 images in 'val' directory.

Processing: 'cervix_mep' -> 'Metaplastic_cells'
  - Found 4000 images in 'train' directory.
  - Found 1000 images in 'val' directory.

Processing: 'cervix_pab' -> 'Parabasal_cells'
  - Found 4000 images in 'train' directory.
  - Found 1000 images in 'val' directory.

Processing: 'cervix_sfi' -> 'Superficial_Intermediate_cells'
  - Found 4000 images in 'train' directory.
  - Found 1000 images in 'val' directory.

Dataset setup and file copying complete!

--- Setting up training for 5 classes ---
Class Mode: categorical, Loss: categorical_crossentropy, Activation: softmax

Found 17500 images belonging to 5 classes.
Found 3750 images belonging to 5 classes.
Found 37

I0000 00:00:1754392014.178994      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


--- Custom CNN Model Summary ---


I0000 00:00:1754392022.174158     119 service.cc:148] XLA service 0x7f5f50003820 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1754392022.175639     119 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1754392022.829116     119 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  3/547[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m26s[0m 49ms/step - accuracy: 0.1267 - loss: 1.8409 - precision: 0.1865   

I0000 00:00:1754392027.202190     119 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m113/547[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m27s[0m 64ms/step - accuracy: 0.4920 - loss: 1.2682 - precision: 0.6711