<a href="https://colab.research.google.com/github/AvellinaLeong/NHM-Nannofossil-Segmentation-Project/blob/main/02_Standardise_Images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Standardise Image Size



*   Cropped segmented instances all have varying sizes and shapes
*   Therefore the largest one is found and black space is added to the rest, to standardise the size and shape of all cropped segmented instances



In [None]:
from google.colab import drive
drive.mount('/content/drive/')

# Set script location to own development space
MY_DEVELOPMENT_SPACE = '/content/drive/MyDrive/development/avellina/'
import os
os.chdir(MY_DEVELOPMENT_SPACE)
!pwd
!ls

Mounted at /content/drive/
/content/drive/MyDrive/development/avellina
Binary_Classification_notebooks  Detectron2_notebooks  Morphometrics_notebooks
detectron2			 Mask-RCNN	       output


In [None]:
import os
from PIL import Image

In [None]:
# Paths to the directories
train_dir = "/content/drive/MyDrive/data/species_53/Binary_Classification/cropped_segmentations/train"
val_dir = "/content/drive/MyDrive/data/species_53/Binary_Classification/cropped_segmentations/val"

## Find the Largest Cropped Image

In [None]:
# Function to find the largest image
def find_largest_image(directories):
    max_width = 0
    max_height = 0
    max_size = 0
    largest_image_path = ""

    for directory in directories:
        for root, _, files in os.walk(directory):
            for file in files:
                if file.endswith(('png', 'jpg', 'jpeg')):
                    file_path = os.path.join(root, file)
                    with Image.open(file_path) as img:
                        width, height = img.size
                        size = width * height
                        if size > max_size:
                            max_width = width
                            max_height = height
                            max_size = size
                            largest_image_path = file_path

    return largest_image_path, max_width, max_height

# Find the largest image in both directories
largest_image_path, max_width, max_height = find_largest_image([train_dir, val_dir])

# Print the path and dimensions of the largest image
print(f"Largest image: {largest_image_path}")
print(f"Dimensions (width x height): {max_width} x {max_height}")

Largest image: /content/drive/MyDrive/data/species_53/Binary_Classification/cropped_segmentations/train/PM_NF_5379_04_3_3.jpeg
Dimensions (width x height): 206 x 155


In [None]:
# Largest image: /content/drive/MyDrive/data/species_53/Binary_Classification/cropped_segmentations/train/PM_NF_5379_04_3_3.jpeg
# Dimensions (width x height): 206 x 155

## Pad all other images to scale to largest dimensions



*   The largest dimensions is 206 x 155 pixels
*   Pad the rest of the images with black space to match the largest sized image to standardise size across all cropped images



In [None]:
# Output directories for the padded images
output_dir = "/content/drive/MyDrive/data/species_53/Binary_Classification/standardised_cropped_segmentations"
train_output_dir = os.path.join(output_dir, "train")
val_output_dir = os.path.join(output_dir, "val")

# Create output directories if they don't exist
os.makedirs(train_output_dir, exist_ok=True)
os.makedirs(val_output_dir, exist_ok=True)

In [None]:
# Function to find the largest image
def find_largest_image(directories):
    max_width = 0
    max_height = 0
    max_size = 0
    largest_image_path = ""

    for directory in directories:
        for root, _, files in os.walk(directory):
            for file in files:
                if file.endswith(('png', 'jpg', 'jpeg')):
                    file_path = os.path.join(root, file)
                    with Image.open(file_path) as img:
                        width, height = img.size
                        size = width * height
                        if size > max_size:
                            max_width = width
                            max_height = height
                            max_size = size
                            largest_image_path = file_path

    return largest_image_path, max_width, max_height

In [None]:
# Function to pad images to make them square and of the same size
def pad_images_to_square(input_directories, output_directories, target_size):
    for input_directory, output_directory in zip(input_directories, output_directories):
        for root, _, files in os.walk(input_directory):
            for file in files:
                if file.endswith(('png', 'jpg', 'jpeg')):
                    input_file_path = os.path.join(root, file)
                    with Image.open(input_file_path) as img:
                        width, height = img.size
                        new_image = Image.new("RGB", (target_size, target_size), (0, 0, 0))  # Create a black background
                        new_image.paste(img, ((target_size - width) // 2, (target_size - height) // 2))  # Center the original image

                        # Construct the output file path
                        relative_path = os.path.relpath(input_file_path, input_directory)
                        output_file_path = os.path.join(output_directory, relative_path)

                        # Create the output directory if it doesn't exist
                        os.makedirs(os.path.dirname(output_file_path), exist_ok=True)

                        # Save the new image
                        new_image.save(output_file_path)

# Find the largest image in both directories
largest_image_path, max_width, max_height = find_largest_image([train_dir, val_dir])

# Determine the target size for all images (making them square)
target_size = max(max_width, max_height)

# Pad all images to the target size and save them to the output directories
pad_images_to_square([train_dir, val_dir], [train_output_dir, val_output_dir], target_size)

print(f"All images have been padded to {target_size}x{target_size} and saved successfully.")

All images have been padded to 206x206 and saved successfully.
