In [41]:
import os
import shutil

# PATHS (change only if your folder name is different)
base_dir = "/content/drive/MyDrive/Dental_Dataset/Caries"
images_dir = os.path.join(base_dir, "images")
masks_dir = os.path.join(base_dir, "masks")

# Create folders if they don't exist
os.makedirs(images_dir, exist_ok=True)
os.makedirs(masks_dir, exist_ok=True)

print(f"Initial contents of {base_dir} before sorting:")
print(os.listdir(base_dir))
print("-----------------------------------------")

# Loop through all files in Caries folder
for file_name in os.listdir(base_dir):
    file_path = os.path.join(base_dir, file_name)

    # Skip folders (including the newly created images/masks folders)
    if os.path.isdir(file_path):
        continue

    # Move mask images
    if "mask" in file_name.lower():
        shutil.move(file_path, os.path.join(masks_dir, file_name))
        print(f"Moved MASK: {file_name}")

    # Move normal images
    elif file_name.lower().endswith(".png"): # Ensure this matches your image file extensions
        shutil.move(file_path, os.path.join(images_dir, file_name))
        print(f"Moved IMAGE: {file_name}")

print("\n✅ Sorting Completed Successfully!")

Initial contents of /content/drive/MyDrive/Dental_Dataset/Caries before sorting:
['images', 'masks']
-----------------------------------------

✅ Sorting Completed Successfully!


In [42]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# --- Configuration ---
IMG_HEIGHT = 256
IMG_WIDTH = 256
IMG_CHANNELS = 1

# --- Custom Metrics (As requested in Deliverables) ---
def dice_coef(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    intersection = tf.keras.backend.sum(y_true_f * y_pred_f)
    return (2. * intersection + 1) / (tf.keras.backend.sum(y_true_f) + tf.keras.backend.sum(y_pred_f) + 1)

def iou_score(y_true, y_pred):
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    intersection = tf.keras.backend.sum(y_true_f * y_pred_f)
    union = tf.keras.backend.sum(y_true_f) + tf.keras.backend.sum(y_pred_f) - intersection
    return (intersection + 1) / (union + 1)

# Combined Loss: Binary Cross Entropy + Dice Loss (Best for segmentation)
def dice_loss(y_true, y_pred):
    return 1 - dice_coef(y_true, y_pred)

def preprocess_image(path):
    # Load image in grayscale
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

    # Resize to standard dimensions
    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))

    # --- SOLUTION FOR LOW CONTRAST ---
    # Apply CLAHE to enhance carious regions
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    img = clahe.apply(img)

    # Normalize pixel values to [0, 1]
    img = img / 255.0
    return np.expand_dims(img, axis=-1) # Add channel dimension

def preprocess_mask(path):
    mask = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    mask = cv2.resize(mask, (IMG_WIDTH, IMG_HEIGHT))

    # Binarize the mask (ensure strictly 0 and 1)
    _, mask = cv2.threshold(mask, 127, 1, cv2.THRESH_BINARY)
    return np.expand_dims(mask, axis=-1)

# --- LOAD YOUR DATA ---
# IMPORTANT: Upload your dataset folders to Colab on the left sidebar
# Structure assumed: "dataset/images" and "dataset/masks"

# Example Loading (Uncomment and adjust paths when you have data uploaded)
# image_dir = 'dataset/images/'
# mask_dir = 'dataset/masks/'
# image_ids = os.listdir(image_dir)

# X = np.zeros((len(image_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.float32)
# Y = np.zeros((len(image_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.float32)

# for n, id_ in enumerate(image_ids):
#     X[n] = preprocess_image(image_dir + id_)
#     Y[n] = preprocess_mask(mask_dir + id_)

# Split into Train/Test
# X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=42)
def build_unet(input_shape):
    inputs = Input(input_shape)

    # --- Encoder (Contracting Path) ---
    c1 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(inputs)
    c1 = Dropout(0.1)(c1)
    c1 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
    p1 = MaxPooling2D((2, 2))(c1)

    c2 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
    c2 = Dropout(0.1)(c2)
    c2 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
    p2 = MaxPooling2D((2, 2))(c2)

    c3 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
    c3 = Dropout(0.2)(c3)
    c3 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
    p3 = MaxPooling2D((2, 2))(c3)

    # --- Bottleneck ---
    c4 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
    c4 = Dropout(0.2)(c4)
    c4 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)

    # --- Decoder (Expansive Path) ---
    u5 = UpSampling2D((2, 2))(c4)
    u5 = concatenate([u5, c3])
    c5 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u5)
    c5 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)

    u6 = UpSampling2D((2, 2))(c5)
    u6 = concatenate([u6, c2])
    c6 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
    c6 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

    u7 = UpSampling2D((2, 2))(c6)
    u7 = concatenate([u7, c1])
    c7 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
    c7 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

    # Output Layer
    outputs = Conv2D(1, (1, 1), activation='sigmoid')(c7)

    model = Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer=Adam(learning_rate=1e-4), loss=dice_loss, metrics=['accuracy', dice_coef, iou_score])
    return model

model = build_unet((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
model.summary()
def visualize_results(model, X_test, y_test, index=0):
    # Predict
    img = X_test[index]
    ground_truth = y_test[index]
    # Predict and Threshold (Probability > 0.5 is caries)
    pred_mask = model.predict(np.expand_dims(img, axis=0))[0]
    pred_mask = (pred_mask > 0.5).astype(np.uint8)

    plt.figure(figsize=(20, 5))

    # 1. Original X-Ray
    plt.subplot(1, 5, 1)
    plt.title("Original X-Ray (CLAHE)")
    plt.imshow(img.squeeze(), cmap='gray')

    # 2. Ground Truth Mask
    plt.subplot(1, 5, 2)
    plt.title("Ground Truth Mask")
    plt.imshow(ground_truth.squeeze(), cmap='gray')

    # 3. Predicted Output
    plt.subplot(1, 5, 3)
    plt.title("Predicted Segmentation")
    plt.imshow(pred_mask.squeeze(), cmap='gray')

    # 4. Overlay (Deliverable 1d)
    plt.subplot(1, 5, 4)
    plt.title("Overlay Visualization")
    plt.imshow(img.squeeze(), cmap='gray')
    plt.imshow(pred_mask.squeeze(), cmap='jet', alpha=0.5) # Alpha blends them

    # 5. Error Map (Deliverable 1f)
    # Difference between Truth and Prediction
    error_map = ground_truth.squeeze() - pred_mask.squeeze()
    plt.subplot(1, 5, 5)
    plt.title("Error Map (White=Missed, Black=False)")
    plt.imshow(error_map, cmap='coolwarm')

    plt.show()

# Run Visualization (Uncomment after training)
# visualize_results(model, X_test, y_test, index=5)

In [43]:
# /Dental_Dataset
#     /images
#         image_001.png
#         image_002.png
#         ...
#     /masks
#         image_001.png  <-- Filename must match the image exactly
#         image_002.png
#         ...

In [47]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm  # This gives you a progress bar

# --- SETTINGS ---
IMG_HEIGHT = 256
IMG_WIDTH = 256
# Change this to the actual path where your folder is located
DATA_PATH = '/content/drive/MyDrive/Dental_Dataset/Caries/' # Corrected to point to the folder containing 'images' and 'masks' subfolders

def load_data(path):
    image_dir = os.path.join(path, "images")
    mask_dir = os.path.join(path, "masks")

    # Check if image and mask directories exist
    if not os.path.exists(image_dir):
        print(f"Error: Image directory not found at {image_dir}. Please ensure sorting cell (ID: 8NYwIFf2z_EW) has been run successfully to create these folders and move files into them.")
        return np.array([]), np.array([])
    if not os.path.exists(mask_dir):
        print(f"Error: Mask directory not found at {mask_dir}. Please ensure sorting cell (ID: 8NYwIFf2z_EW) has been run successfully to create these folders and move files into them.")
        return np.array([]), np.array([])

    file_names = os.listdir(image_dir)
    if not file_names:
        print(f"Warning: No image files found in {image_dir}. Returning empty datasets.")
        return np.array([]), np.array([])

    loaded_images = []
    loaded_masks = []

    print("Loading and preprocessing images and masks...")
    for file_name in tqdm(file_names, desc="Processing files"):
        img_path = os.path.join(image_dir, file_name)

        # Correctly derive mask filename
        mask_file_name = file_name.replace('.png', '-mask.png')
        mask_path = os.path.join(mask_dir, mask_file_name)

        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        if img is None:
            print(f"Warning: Could not read image file {img_path}. Skipping.")
            continue
        if mask is None:
            print(f"Warning: Could not read mask file {mask_path}. Skipping this pair.")
            continue

        # Preprocess image
        img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        img = clahe.apply(img)
        img = img / 255.0
        img = np.expand_dims(img, axis=-1)

        # Preprocess mask
        mask = cv2.resize(mask, (IMG_WIDTH, IMG_HEIGHT))
        _, mask = cv2.threshold(mask, 127, 1, cv2.THRESH_BINARY)
        mask = np.expand_dims(mask, axis=-1)

        loaded_images.append(img)
        loaded_masks.append(mask)

    if not loaded_images:
        print("No valid image-mask pairs were loaded after processing. Check file integrity or preprocessing steps.")
        return np.array([]), np.array([])

    return np.array(loaded_images), np.array(loaded_masks)

# --- EXECUTE LOAD ---
# 1. Mount Google Drive (If using Colab)
from google.colab import drive
drive.mount('/content/drive')

# --- Verification step: List contents of MyDrive to find correct path ---
print("Contents of MyDrive:")
print(os.listdir('/content/drive/MyDrive'))

print(f"Contents of {DATA_PATH}:") # This will now show contents of the 'Caries' folder
# Adding a check for directory existence before listing
if os.path.exists(DATA_PATH):
    print(os.listdir(DATA_PATH))
else:
    print(f"Directory {DATA_PATH} does not exist.")

# 2. Run the loader
X, y = load_data(DATA_PATH)

print(f"Data Loaded Successfully!")
print(f"X Shape: {X.shape}") # Should be (1132, 256, 256, 1)
print(f"y Shape: {y.shape}") # Should be (1132, 256, 256, 1)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Contents of MyDrive:
['Maths Week 1 raded assignment.pdf', 'Electronic Systems Week 1 Graded assignment.jpg', 'Maths Graded assignment week 2.jpg', 'Classroom', 'IMG_9024.jpeg', 'WhatsApp Image 2025-02-05 at 01.38.55_1c9f06eb.jpg', 'ES.jpg', 'maths 4.jpg', 'WhatsApp Image 2025-02-19 at 11.41.09_804fb0ff.jpg', 'WhatsApp Image 2025-02-19 at 11.41.10_bc058b17.jpg', 'image (1).jpg', 'image.jpg', 'Shriyans Patro - Week 1 - Level 1 - 15 Practice Problems.pdf', 'Shriyans Patro - Week 1 - Level 1 - 10 Practice Problems.pdf', 'Difference between the needs of the Self (like happiness, respect, trust) and the needs of the Body (like food, clothes, shelter) (Sep 25, 2025 at 12:06 PM).jpg', 'ETicket for Chennai .pdf', 'Saved from the Google app', 'Colab Notebooks', 'Dental_Dataset']
Contents of /content/drive/MyDrive/Dental_Dataset/Caries/:
['images', 'masks']
Loading and

Processing files:  83%|████████▎ | 470/566 [00:16<00:02, 33.61it/s]



Processing files: 100%|██████████| 566/566 [00:19<00:00, 28.94it/s]


Data Loaded Successfully!
X Shape: (565, 256, 256, 1)
y Shape: (565, 256, 256, 1)


In [45]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
