In [None]:
import tensorflow as tf
import cv2
import numpy as np
import os
import random
from pathlib import Path

# path_configuting_for _images
DATASET_PATH = '/content/drive/MyDrive/Pre_processed_data-20260110T003759Z-1-001'
OUTPUT_PATH = '/content/drive/MyDrive/final_data'
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

# splitting_configuration
CREATE_VAL_SPLIT_FOR = ['Fractured']
VAL_SPLIT_RATIO = 0.2  # 20% of training data will be moved to validation

# --- 1. CORE MEDICAL FILTERING LOGIC (OpenCV) ---
def apply_medical_filters_and_resize(image_array):
    """
    ROBUST PIPELINE:
    1. Convert to uint8
    2. Apply Filters on HIGH-RES image (Preserves Detail)
    3. Resize to 224x224 (Reduces Size)
    """
    # 1. Convert Tensor to Numpy
    img = image_array.astype(np.uint8)

    # Check if image loaded correctly
    if img is None or img.size == 0:
        return np.zeros((IMG_SIZE[0], IMG_SIZE[1], 3), dtype=np.float32)

    # 2. Medical Filtering (High Resolution)
    # Convert to LAB
    try:
        lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
        l, a, b = cv2.split(lab)

        # CLAHE (Contrast Enhancement)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        l = clahe.apply(l)

        # Merge back
        lab = cv2.merge((l, a, b))
        img_clahe = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)

        # Gaussian Blur (Denoising)
        img_filtered = cv2.GaussianBlur(img_clahe, (3, 3), 0)
    except Exception:
        # Fallback if image format is weird
        img_filtered = img

    # 3. Resize to Target Size (224x224)
    img_resized = cv2.resize(img_filtered, IMG_SIZE)

    return img_resized.astype(np.float32)

#
def tf_preprocess_wrapper(image, label):
    """
    Wraps the OpenCV logic and applies MobileNet Normalization.
    """
    # 1. Run Python Logic (Filter + Resize)
    [processed_img] = tf.numpy_function(apply_medical_filters_and_resize, [image], [tf.float32])

    # 2. Enforce Shape (Required by TensorFlow)
    processed_img.set_shape([IMG_SIZE[0], IMG_SIZE[1], 3])

    # 3. Intensity Normalization (MobileNet Standard: -1 to 1)
    final_img = tf.keras.applications.mobilenet_v3.preprocess_input(processed_img)

    return final_img, label

# --- 3. DATASET BUILDER (Updated for 4 Classes) ---
def get_preprocessed_dataset(subset_name='train'):
    directory = os.path.join(OUTPUT_PATH, subset_name)
    if not os.path.exists(directory):
        # Fallback to source if output doesn't exist yet (for testing)
        directory = os.path.join(DATASET_PATH, subset_name)
        if not os.path.exists(directory):
             raise FileNotFoundError(f"Directory not found: {directory}")

    print(f"Loading {subset_name} data from: {directory}")

    # A. Load WITHOUT resizing or batching first
    ds = tf.keras.utils.image_dataset_from_directory(
        directory,
        labels='inferred',
        # UPDATED: Use 'categorical' for 4 classes (Normal, Pneumonia, Fractured, Other)
        # Use 'int' if you prefer sparse integers (0, 1, 2, 3)
        label_mode='categorical',
        class_names=None, # Auto-detects all folders present
        image_size=None, # Load Original Size
        batch_size=None, # No batching yet
        shuffle=(subset_name == 'train')
    )

    # B. Apply Filter -> Resize -> Normalize
    ds = ds.map(tf_preprocess_wrapper, num_parallel_calls=tf.data.AUTOTUNE)

    # C. Batch and Optimize
    ds = ds.batch(BATCH_SIZE)
    ds = ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

    return ds

# --- 4. SAVE TO DISK FUNCTION (With Auto-Split for Fractured) ---
def process_and_save_to_disk(output_dir=OUTPUT_PATH):
    """
    Reads from DATASET_PATH.
    Applies filters.
    Saves to OUTPUT_PATH.
    automatically moves 20% of 'Fractured' training images to 'val' folder.
    """
    src_path = Path(DATASET_PATH)
    dst_path = Path(output_dir)

    print(f"\n--- Robust Processing & Saving to {dst_path} ---")

    if not src_path.exists():
        print(f"Error: Source {src_path} does not exist.")
        return

    # Get all PNG/JPG images recursively
    image_files = list(src_path.rglob('*.[pjPJ][nNpP][gG]*')) # Matches .png, .jpg, .jpeg
    print(f"Found {len(image_files)} images. Starting pipeline...")

    count = 0
    split_counts = {'train': 0, 'val': 0}

    for img_path in image_files:
        # 1. Identify Structure
        # Expected structure: DATASET_PATH / subset / class / image.png
        relative_path = img_path.relative_to(src_path)
        parts = relative_path.parts

        # Skip 'masks' folder
        if 'masks' in str(relative_path).lower():
            continue

        # Determine subset (train/val/test) and class name
        if len(parts) >= 2:
            subset = parts[0]      # e.g., 'train'
            class_name = parts[1]  # e.g., 'Fractured'
        else:
            continue # Skip files in root

        # 2. Split Logic for 'Fractured' Class
        # If image is in 'train' and belongs to 'Fractured', randomly move to 'val'
        target_subset = subset

        if subset == 'train' and class_name in CREATE_VAL_SPLIT_FOR:
            if random.random() < VAL_SPLIT_RATIO:
                target_subset = 'val'
                split_counts['val'] += 1
            else:
                split_counts['train'] += 1

        # Construct new relative path
        # Replace original subset (e.g. 'train') with target_subset (e.g. 'val')
        new_parts = list(parts)
        new_parts[0] = target_subset
        new_relative_path = Path(*new_parts)

        save_path = dst_path / new_relative_path

        # Create the directory (Make the folder)
        save_path.parent.mkdir(parents=True, exist_ok=True)

        # 3. Read Image (High Res)
        img = cv2.imread(str(img_path))
        if img is None:
            continue

        # 4. Convert BGR to RGB
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # 5. Apply Robust Logic (Filter High-Res -> Resize)
        processed_array = apply_medical_filters_and_resize(img_rgb)

        # 6. Save
        save_img = np.clip(processed_array, 0, 255).astype(np.uint8)
        save_img_bgr = cv2.cvtColor(save_img, cv2.COLOR_RGB2BGR)

        cv2.imwrite(str(save_path), save_img_bgr)
        count += 1

        if count % 200 == 0:
            print(f"Processed {count} images... (Fractured Split Stats: {split_counts})")

    print(f"\nSuccess! {count} images fully processed.")
    print(f"Auto-split statistics for {CREATE_VAL_SPLIT_FOR}: {split_counts}")
    print(f"Data saved to: {dst_path}")
    print("Folder structure is now ready for 4-class classification.")

# --- 5. EXECUTION ---
if __name__ == "__main__":
    process_and_save_to_disk()

    # Optional: Verify dataset loading
    try:
        print("\nVerifying Dataset Loading...")
        # Point to the NEW output path
        test_ds = tf.keras.utils.image_dataset_from_directory(
            os.path.join(OUTPUT_PATH, 'train'),
            labels='inferred',
            label_mode='categorical'
        )
        print("Classes detected:", test_ds.class_names)
    except Exception as e:
        print(f"Verification skipped (Data might not be ready): {e}")


--- Robust Processing & Saving to /content/drive/MyDrive/final_data ---
Found 7428 images. Starting pipeline...
Processed 200 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 400 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 600 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 800 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 1000 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 1200 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 1400 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 1600 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 1800 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 2000 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 2200 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 2400 images... (Fractured Split Stats: {'train': 0, 'val': 0})
Processed 2600 

In [None]:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras import layers, models, regularizers, callbacks
from tensorflow.keras.applications import MobileNetV3Large

# --- 1. CONFIGURATION ---
# Ensure this path matches your drive structure
LOCAL_DATASET_PATH = '/content/drive/MyDrive/final_data/Pre_processed_data'
INPUT_SHAPE = (224, 224, 3)
BATCH_SIZE = 32
LEARNING_RATE = 1e-4
EPOCHS =25

# --- 2. DATA LOADING WITH CRASH FIX ---
if not os.path.exists(LOCAL_DATASET_PATH):
    raise FileNotFoundError(f"Dataset not found at {LOCAL_DATASET_PATH}")

def load_dataset(subset, explicit_class_names=None):
    directory = os.path.join(LOCAL_DATASET_PATH, subset)

    if not os.path.exists(directory):
        print(f"Creating directory: {directory}")
        os.makedirs(directory, exist_ok=True)

    # --- THE FIX FOR THE VALUE ERROR ---
    # If we are loading 'val' and we expect specific classes (e.g. Tuberculosis),
    # we must ensure those folders exist, even if they are empty.
    if explicit_class_names:
        for class_name in explicit_class_names:
            class_path = os.path.join(directory, class_name)
            if not os.path.exists(class_path):
                print(f"‚ö†Ô∏è Fixing missing folder in {subset}: {class_name}")
                os.makedirs(class_path, exist_ok=True)
    # -----------------------------------

    raw_ds = tf.keras.utils.image_dataset_from_directory(
        directory,
        image_size=(INPUT_SHAPE[0], INPUT_SHAPE[1]),
        batch_size=BATCH_SIZE,
        label_mode='categorical',
        shuffle=(subset == 'train'),
        class_names=explicit_class_names
    )

    # Normalize MobileNetV3 (expects -1 to 1)
    norm_layer = tf.keras.applications.mobilenet_v3.preprocess_input
    ds = raw_ds.map(lambda x, y: (norm_layer(x), y), num_parallel_calls=tf.data.AUTOTUNE)

    return ds.cache().prefetch(tf.data.AUTOTUNE), raw_ds.class_names

# 1. Load Train FIRST to find the true classes
print("--- Loading Training Data ---")
train_ds, class_names = load_dataset('train')
print(f" Training Classes Found: {class_names}")

# 2. Load Val using the EXACT SAME class list
# This prevents the mismatch error by forcing the same structure
print("--- Loading Validation Data ---")
val_ds, _ = load_dataset('val', explicit_class_names=class_names)

NUM_CLASSES = len(class_names)

# --- 3. MODEL ARCHITECTURE ---
data_augmentation = models.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.15),
    layers.RandomZoom(0.1),
], name="augmentation")

def build_model(num_classes):
    inputs = layers.Input(shape=INPUT_SHAPE)
    x = data_augmentation(inputs)

    # MobileNetV3Large
    base_model = MobileNetV3Large(
        input_shape=INPUT_SHAPE,
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = True

    # Freeze bottom layers to stabilize training
    for layer in base_model.layers[:-40]:
        layer.trainable = False

    x = base_model(x, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.Dropout(0.4)(x) # Reduced dropout slightly

    outputs = layers.Dense(num_classes, activation='softmax', dtype='float32')(x)
    return models.Model(inputs, outputs, name="Medical_MobileNetV3")

model = build_model(NUM_CLASSES)

# --- 4. TRAINING ---
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

callbacks_list = [
    callbacks.ModelCheckpoint('best_model_1.keras', save_best_only=True, monitor='val_accuracy', mode='max'),
    callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
]

print(f"\n Starting training for classes: {class_names}")
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=callbacks_list
)

--- Loading Training Data ---
Found 4876 files belonging to 4 classes.
‚úÖ Training Classes Found: ['Fractured', 'Normal', 'Pneumonia', 'Tuberculosis']
--- Loading Validation Data ---
Found 276 files belonging to 4 classes.

üöÄ Starting training for classes: ['Fractured', 'Normal', 'Pneumonia', 'Tuberculosis']
Epoch 1/25
[1m153/153[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m39s[0m 184ms/step - accuracy: 0.4876 - loss: 1.4395 - val_accuracy: 0.7391 - val_loss: 0.6962
Epoch 2/25
[1m153/153[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m10s[0m 68ms/step - accuracy: 0.7214 - loss: 0.6804 - val_accuracy: 0.7319 - val_loss: 0.7287
Epoch 3/25
[1m153/153[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m10s[0m 68ms/step - accuracy: 0.7471 - loss: 0.6013 - val_accuracy: 0.7174 - val_loss: 0.8102
Epoch 4/25
[1m153/153[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î

In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os

# --- CONFIGURATION ---
MODEL_PATH = 'best_model_1.keras' # Ensure this matches the saved name
IMG_SIZE = (224, 224)

# ‚úÖ CORRECTED CLASS LIST
# Based on your training logs, you only have 3 classes.
# The order MUST be alphabetical as per TensorFlow's default behavior.
CLASS_NAMES = ['Fractured', 'Normal', 'Pneumonia', 'Tuberculosis']

# --- LOAD MODEL ---
if not os.path.exists(MODEL_PATH):
    print(f" Model not found: {MODEL_PATH}")
    exit()

model = tf.keras.models.load_model(MODEL_PATH)

# --- PREDICTION FUNCTION ---
def predict_image(image_path):
    if not os.path.exists(image_path):
        print(" Image file not found.")
        return

    # 1. Load and Preprocess
    img = cv2.imread(image_path)
    if img is None: return

    # To resize image.
    img_resized = cv2.resize(img, IMG_SIZE)

    # Convert to RGB and Preprocess for MobileNetV3
    img_rgb = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
    img_array = img_rgb.astype(np.float32)
    img_preprocessed = tf.keras.applications.mobilenet_v3.preprocess_input(img_array)

    # Add batch dimension
    img_batch = np.expand_dims(img_preprocessed, axis=0)

    # 2. Predict
    predictions = model.predict(img_batch, verbose=0)[0]
    predicted_index = np.argmax(predictions)
    confidence = predictions[predicted_index] * 100

    result_label = CLASS_NAMES[predicted_index]

    # 3. Output
    print("-" * 30)
    print(f"Input: {os.path.basename(image_path)}")
    if result_label == "Normal":
        print(f" Prediction: {result_label} ({confidence:.2f}%)")
    else:
        print(f" Prediction: {result_label} DETECTED ({confidence:.2f}%)")

    print("\nConfidence Breakdown:")
    for i, score in enumerate(predictions):
        print(f"  - {CLASS_NAMES[i]}: {score*100:.2f}%")
    print("-" * 30)


if __name__ == "__main__":
    print(f"Loaded Model for classes: {CLASS_NAMES}")
    while True:
        path = input("\nEnter image path (or 'q' to quit): ").strip().strip('"').strip("'")
        if path.lower() == 'q': break
        predict_image(path)