In [11]:
from google.colab import drive
import os
import cv2
import numpy as np

# Mount Google Drive to access your dataset
drive.mount('/content/drive')

# Install any missing dependencies
!pip install albumentations opencv-python-headless numpy
import albumentations as A


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

# Path to the shortcut folder in your Google Drive
BASE_DIR = "/content/drive/My Drive/FYP_work/DecayDataSrc"

# Test if the shortcut folder is accessible
import os

print("DecayDataSrc directory exists:", os.path.exists(BASE_DIR))
print("Contents of DecayDataSrc:", os.listdir(BASE_DIR) if os.path.exists(BASE_DIR) else "Not found!")


Mounted at /content/drive
DecayDataSrc directory exists: True
Contents of DecayDataSrc: ['Teeth_Dataset', 'Decay_Dataset', 'src']


In [13]:
import os
import cv2

def create_directory(path_segments):
    """Creates a directory if it does not exist."""
    path = os.path.join(*path_segments)
    # Adding a check to see if path exists and if it's writable
    if not os.path.exists(path):
        try:
            os.makedirs(path, exist_ok=True)
        except OSError as e:
            if e.errno == 30:  # Read-only file system error
                print(f"Error: Cannot create directory '{path}'. File system is read-only.")

            else:
                raise
    elif os.path.isdir(path):
        print(f"Directory '{path}' already exists. Skipping creation.")
    else:
        print(f"Error: '{path}' exists but is not a directory.")
    return path


def interpolate_image(image, target_size):
    """Interpolates an image to the target size."""

    return cv2.resize(image, target_size)

BASE_DIR = "/content/drive/My Drive/FYP_work/DecayDataSrc/Teeth_Dataset"

train_images_dir = f"{BASE_DIR}/Train_Data/Images"
train_masks_dir = f"{BASE_DIR}/Train_Data/Masks"

# Output directories
augmented_images_dir = create_directory([BASE_DIR, "augmented/Images1"])
augmented_masks_dir = create_directory([BASE_DIR, "augmented/Masks1"])
compressed_dir = create_directory([BASE_DIR, "compressed1"])

Directory '/content/drive/My Drive/FYP_work/DecayDataSrc/Teeth_Dataset/augmented/Images1' already exists. Skipping creation.
Directory '/content/drive/My Drive/FYP_work/DecayDataSrc/Teeth_Dataset/augmented/Masks1' already exists. Skipping creation.
Directory '/content/drive/My Drive/FYP_work/DecayDataSrc/Teeth_Dataset/compressed1' already exists. Skipping creation.


In [14]:
import os

# Check existence of directories
print("Train images directory exists:", os.path.exists(train_images_dir))
print("Train masks directory exists:", os.path.exists(train_masks_dir))

# List files, sorted alphabetically
if os.path.exists(train_images_dir):
    train_images_files = sorted(os.listdir(train_images_dir)) # Sort the list of files
    print("Train Images:", train_images_files[:5])  # Show first 5 images

if os.path.exists(train_masks_dir):
    train_masks_files = sorted(os.listdir(train_masks_dir)) # Sort the list of files
    print("Train Masks:", train_masks_files[:5])  # Show first 5 masks

Train images directory exists: True
Train masks directory exists: True
Train Images: ['1.png', '10.png', '100.png', '101.png', '102.png']
Train Masks: ['1.png', '10.png', '100.png', '101.png', '102.png']


In [15]:
#Check alignment of images and masks
for i in range(5):
    print(f"Image: {train_images_files[i]} <-> Mask: {train_masks_files[i]}")

Image: 1.png <-> Mask: 1.png
Image: 10.png <-> Mask: 10.png
Image: 100.png <-> Mask: 100.png
Image: 101.png <-> Mask: 101.png
Image: 102.png <-> Mask: 102.png


In [16]:
input_image_size = 512
target_shape = (input_image_size, input_image_size)
import albumentations as A
transform_crop = A.Compose([
    A.RandomCrop(width=input_image_size, height=input_image_size, p=1)
], additional_targets={'mask': 'mask'})

transform_rotate90 = A.Compose([
    A.RandomRotate90(p=1)
], additional_targets={'mask': 'mask'})

transform_flip = A.Compose([
    A.HorizontalFlip(p=1)
], additional_targets={'mask': 'mask'})

transform_brightness = A.Compose([
    A.RandomBrightnessContrast(p=1)
], additional_targets={'mask': 'mask'})  # Even brightness transformation keeps mask aligned

transform_all = A.Compose([
    A.RandomCrop(width=input_image_size, height=input_image_size, p=1),
    A.HorizontalFlip(p=1),
    A.RandomRotate90(p=1),
    A.RandomBrightnessContrast(p=1)
], additional_targets={'mask': 'mask'})

transform_noise = A.Compose([
    A.RandomBrightnessContrast(p=1, always_apply=True)
], additional_targets={'mask': 'mask'})


  A.RandomBrightnessContrast(p=1, always_apply=True)


In [None]:
# Helper function for visualization
def visualize(image, mask, title="Image and Mask"):
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    axes[0].imshow(image, cmap='gray')
    axes[0].set_title("Image")
    axes[1].imshow(mask, cmap='gray')
    axes[1].set_title("Mask")
    plt.suptitle(title)
    plt.show()

Augmenting Images


In [17]:
# Augment training data
for file_name in train_images_files:
    try:
        # Load image and mask
        image = cv2.imread(os.path.join(train_images_dir, file_name), cv2.IMREAD_GRAYSCALE)
        mask = cv2.imread(os.path.join(train_masks_dir, file_name), cv2.IMREAD_GRAYSCALE)

        # Resize image and mask to the target shape
        image = cv2.resize(image, target_shape, interpolation=cv2.INTER_LINEAR)
        mask = cv2.resize(mask, target_shape, interpolation=cv2.INTER_NEAREST)

        # Convert mask to strictly binary values (Keeps it as 0 and 1)
        mask = (mask > 127).astype(np.uint8)

        # Save original data
        cv2.imwrite(f"{augmented_images_dir}/00_{file_name}", image)
        cv2.imwrite(f"{augmented_masks_dir}/00_{file_name}", (mask * 255).astype("uint8"))


        # Apply transformations and save
        transformed = transform_crop(image=image, mask=mask)
        cv2.imwrite(f"{augmented_images_dir}/01_{file_name}", transformed["image"])
        cv2.imwrite(f"{augmented_masks_dir}/01_{file_name}", (transformed["mask"] * 255).astype("uint8"))



        transformed = transform_rotate90(image=image, mask=mask)
        cv2.imwrite(f"{augmented_images_dir}/02_{file_name}", transformed["image"])
        cv2.imwrite(f"{augmented_masks_dir}/02_{file_name}", (transformed["mask"] * 255).astype("uint8"))



        transformed = transform_flip(image=image, mask=mask)
        cv2.imwrite(f"{augmented_images_dir}/03_{file_name}", transformed["image"])
        cv2.imwrite(f"{augmented_masks_dir}/03_{file_name}", (transformed["mask"] * 255).astype("uint8"))



        transformed = transform_all(image=image, mask=mask)
        cv2.imwrite(f"{augmented_images_dir}/04_{file_name}", transformed["image"])
        cv2.imwrite(f"{augmented_masks_dir}/04_{file_name}", (transformed["mask"] * 255).astype("uint8"))


    except Exception as e:
        print(f"Error processing {file_name}: {e}")


Seperate Augmentation for images (deprecated in latest notebook)

In [None]:
# Augment training data
for file_name in train_images_files:
    try:
        # Load image and mask
        image = cv2.imread(os.path.join(train_images_dir, file_name), cv2.IMREAD_GRAYSCALE)
        mask = cv2.imread(os.path.join(train_masks_dir, file_name), cv2.IMREAD_GRAYSCALE)

        # Resize image and mask to the target shape
        image = cv2.resize(image, target_shape)
        mask = cv2.resize(mask, target_shape)

        # Save original data
        cv2.imwrite(f"{augmented_images_dir}/00_{file_name}", image)


        # Apply transformations and save
        transformed = transform_crop(image=image, mask=mask)
        cv2.imwrite(f"{augmented_images_dir}/01_{file_name}", transformed["image"])


        transformed = transform_rotate90(image=image, mask=mask)
        cv2.imwrite(f"{augmented_images_dir}/02_{file_name}", transformed["image"])


        transformed = transform_flip(image=image, mask=mask)
        cv2.imwrite(f"{augmented_images_dir}/03_{file_name}", transformed["image"])


        transformed = transform_all(image=image, mask=mask)
        cv2.imwrite(f"{augmented_images_dir}/04_{file_name}", transformed["image"])

    except Exception as e:
        print(f"Error processing {file_name}: {e}")


In [None]:
import albumentations as A

# Define transform_noise
transform_noise = A.Compose([
    A.RandomBrightnessContrast(p=1, always_apply=True)
])


  A.RandomBrightnessContrast(p=1, always_apply=True)


Seperate Augmentation for masks (deprecated in latest notebook)

In [None]:
for file_name in train_images_files:
    try:
        # Load image and mask
        image = cv2.imread(os.path.join(train_images_dir, file_name), cv2.IMREAD_GRAYSCALE)
        mask = cv2.imread(os.path.join(train_masks_dir, file_name), cv2.IMREAD_GRAYSCALE)

        # Resize image and mask to the target shape
        image = cv2.resize(image, target_shape)
        mask = cv2.resize(mask, target_shape)

        if image is None or mask is None:
            print(f"Missing file: {file_name}")
            continue

        # Convert mask to strictly binary values
        mask = (mask > 127).astype(np.uint8)  # Ensures only 0 and 1

        # Debugging: Print unique values
        unique_values = np.unique(mask)
        print(f"Mask unique values for {file_name}: {unique_values}")

         # Debugging: Print unique values
        unique_values = np.unique(mask)
        print(f"Mask unique values for {file_name}: {unique_values}")

        # Save the original mask
        cv2.imwrite(f"{augmented_masks_dir}/00_{file_name}", (mask * 255).astype("uint8"))

        # Apply augmentations
        transformed = transform_crop(image=image, mask=mask)
        augmented_mask = transformed["mask"]
        cv2.imwrite(f"{augmented_masks_dir}/01_{file_name}", (augmented_mask * 255).astype("uint8"))

        transformed = transform_rotate90(image=image, mask=mask)
        augmented_mask = transformed["mask"]
        cv2.imwrite(f"{augmented_masks_dir}/02_{file_name}", (augmented_mask * 255).astype("uint8"))

        transformed = transform_noise(image=image, mask=mask)
        augmented_mask = transformed["mask"]
        cv2.imwrite(f"{augmented_masks_dir}/03_{file_name}", (augmented_mask * 255).astype("uint8"))

        transformed = transform_all(image=image, mask=mask)
        augmented_mask = transformed["mask"]
        cv2.imwrite(f"{augmented_masks_dir}/04_{file_name}", (augmented_mask * 255).astype("uint8"))

          # Debugging: Check unique values after augmentation
        unique_values_aug = np.unique(augmented_mask)
        print(f"Mask unique values for {file_name} AFTER augmentation: {unique_values_aug}")

        # Visualize one sample (randomly for 5% of images)
        if np.random.rand() < 0.05:
            visualize(image, mask, title=f"Original - {file_name}")
            visualize(transformed["image"], augmented_mask, title=f"Augmented - {file_name}")

    except Exception as e:
        print(f"Error processing file {file_name}: {e}")


Mask unique values for 1.png: [0 1]
Mask unique values for 1.png: [0 1]
Mask unique values for 1.png AFTER augmentation: [0 1]
Mask unique values for 10.png: [0 1]
Mask unique values for 10.png: [0 1]
Mask unique values for 10.png AFTER augmentation: [0 1]
Mask unique values for 100.png: [0 1]
Mask unique values for 100.png: [0 1]
Mask unique values for 100.png AFTER augmentation: [0 1]
Mask unique values for 101.png: [0 1]
Mask unique values for 101.png: [0 1]
Mask unique values for 101.png AFTER augmentation: [0 1]
Mask unique values for 102.png: [0 1]
Mask unique values for 102.png: [0 1]
Mask unique values for 102.png AFTER augmentation: [0 1]
Mask unique values for 103.png: [0 1]
Mask unique values for 103.png: [0 1]
Mask unique values for 103.png AFTER augmentation: [0 1]
Mask unique values for 104.png: [0 1]
Mask unique values for 104.png: [0 1]
Mask unique values for 104.png AFTER augmentation: [0 1]
Mask unique values for 105.png: [0 1]
Mask unique values for 105.png: [0 1]
Ma

Checking augmented images are sorted correctly

In [22]:
import os

# Assuming augmented_images_dir and augmented_masks_dir are defined as in your code
augmented_images_files = sorted(os.listdir(augmented_images_dir))
augmented_masks_files = sorted(os.listdir(augmented_masks_dir))

print("Augmented Images (first 5):")
for filename in augmented_images_files[:5]:
    print(filename)

print("\nAugmented Masks (first 5):")
for filename in augmented_masks_files[:5]:
    print(filename)

# Check alignment (first 5)
print("\nAlignment Check (first 5):")
for i in range(5):
    print(f"Image: {augmented_images_files[i]} <-> Mask: {augmented_masks_files[i]}")

Augmented Images (first 5):
00_1.png
00_10.png
00_100.png
00_101.png
00_102.png

Augmented Masks (first 5):
00_1.png
00_10.png
00_100.png
00_101.png
00_102.png

Alignment Check (first 5):
Image: 00_1.png <-> Mask: 00_1.png
Image: 00_10.png <-> Mask: 00_10.png
Image: 00_100.png <-> Mask: 00_100.png
Image: 00_101.png <-> Mask: 00_101.png
Image: 00_102.png <-> Mask: 00_102.png


Compressing images and masks


In [19]:
import random

# Compressing images and their masks together
compressed_dir = f"{BASE_DIR}/compressed1"
os.makedirs(compressed_dir, exist_ok=True)

augmented_images_files = sorted(os.listdir(augmented_images_dir))
augmented_masks_files = sorted(os.listdir(augmented_masks_dir))

#Check if number of images and masks are equal
if len(augmented_images_files) != len(augmented_masks_files):
    print("Error: Unequal number of images and masks!")
    #Handle error appropriately
    exit(1)


for i in range(len(augmented_images_files)):
    image_filename = augmented_images_files[i]
    mask_filename = augmented_masks_files[i]

    # Extract common prefix (e.g. "00_", "01_", etc.)
    image_prefix = image_filename.split('_')[0]
    mask_prefix = mask_filename.split('_')[0]

    if image_prefix != mask_prefix:
        print(f"Mismatch detected! Image prefix: {image_prefix}, Mask prefix: {mask_prefix} at index: {i}")
        continue  # Skip this pair if there is a mismatch

    try:
        # Load augmented image and mask ENSURING MATCHING NAMES
        image_path = os.path.join(augmented_images_dir, image_filename)
        mask_path = os.path.join(augmented_masks_dir, mask_filename)

        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE).astype("float32")
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE).astype("uint8")
        mask[mask == 255] = 1 # Normalize mask

        # Resize
        image = cv2.resize(image, (input_image_size, input_image_size))
        mask = cv2.resize(mask, (input_image_size, input_image_size))

        #Save compressed with matching name
        np.savez_compressed(
            os.path.join(compressed_dir, f"{image_filename}.npz"),
            **{
                f"image_{image_filename}": image,
                f"mask_{mask_filename}": mask
            }
        )

    except Exception as e:
        print(f"Failed to compress {image_filename} with {mask_filename}: {e}")


# Print 5 random compressed files for verification
random_indices = random.sample(range(len(os.listdir(compressed_dir))), 5)

for index in random_indices:
    compressed_file = os.listdir(compressed_dir)[index]
    print(f"Compressed file {index +1}: {compressed_file}")


Compressed file 210: 01_79.png.npz
Compressed file 464: 03_99.png.npz
Compressed file 42: 00_31.png.npz
Compressed file 260: 02_18.png.npz
Compressed file 115: 00_98.png.npz
