# Cropping images automatically

In [6]:
import os
import pandas as pandas
import cv2

In [7]:
# directories
raw_dirs = [
    r"E:\DLSU\Corallian\Dataset\C-30\SHINE-1739(SanDiegoA)\RAW\Olympus TG6 CBRACT 2_01",
    r"E:\DLSU\Corallian\Dataset\C-30\SHINE-1739(SanDiegoA)\RAW\Olympus TG6 CBRACT 2_02",
]

scored_dir = r"E:\DLSU\Corallian\Dataset\C-30\SHINE-1739(SanDiegoA)\SCORED"
output_dir = r"E:\DLSU\Corallian\Dataset\cropped-corals"

# create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

In [8]:
crop_size = 500
half_crop = crop_size // 2

In [9]:
# function to find full path of an image

def find_image(image_name, raw_dirs):
    for raw_dir in raw_dirs:
        image_path = os.path.join(raw_dir, image_name)
        if os.path.exists(image_path):
            return image_path
    return None

In [10]:
# Process each CPCE annotation file
for annotation_file in os.listdir(scored_dir):
    if annotation_file.endswith(".cpce"):
        continue

    annotation_path = os.path.join(scored_dir, annotation_file)
    print(f"Processing: {annotation_path}")

    # Parse the annotation file
    with open(annotation_path, "r") as file:
        lines = file.readlines()

    try:
        # Extract image name
        image_name = os.path.basename(lines[0].split(",")[1].strip('"'))

        # Find the line containing "0,0" to locate the start of the annotations
        start_index = None
        for i, line in enumerate(lines):
            if line.strip() == "0,0":
                start_index = i + 1
                break

        if start_index is None:
            print(f"Skipping file {annotation_file}: '0,0' marker not found.")
            continue

        # Parse the number of annotations
        try:
            num_annotations = int(lines[start_index].strip())
        except ValueError:
            print(f"Skipping file {annotation_file} due to invalid number of annotations.")
            continue

        if num_annotations <= 0:
            print(f"Skipping file {annotation_file} due to zero or negative annotations.")
            continue

        # Extract points and labels
        points = []
        labels = []
        for i in range(num_annotations):
            point_line = lines[start_index + 1 + i].strip()
            label_line = lines[start_index + 1 + num_annotations + i].strip()

            try:
                # Parse point coordinates
                x, y = map(int, point_line.split(","))
                points.append((x, y))

                # Parse label
                label = label_line.split(",")[1].strip('"')
                labels.append(label)
            except (ValueError, IndexError):
                print(f"Skipping invalid line in file {annotation_file}: {point_line} or {label_line}")

    except Exception as e:
        print(f"Error parsing annotation file {annotation_file}: {e}")
        continue

    # Find the corresponding image
    image_path = find_image(image_name, raw_dirs)
    if not image_path:
        print(f"Image not found: {image_name}")
        continue

    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Failed to load image: {image_path}")
        continue

    # Crop and save each annotation
    for i, (point, label) in enumerate(zip(points, labels)):
        x, y = point

        # Define crop boundaries
        x_min = max(0, x - half_crop)
        y_min = max(0, y - half_crop)
        x_max = min(image.shape[1], x + half_crop)
        y_max = min(image.shape[0], y + half_crop)

        # Check for valid crop (not empty)
        if x_min >= x_max or y_min >= y_max:
            print(f"Skipping invalid crop at ({x}, {y}) in image {image_name}. Crop is empty.")
            continue

        # Crop the image
        cropped = image[y_min:y_max, x_min:x_max]

        # Check if cropped image is empty
        if cropped.size == 0:
            print(f"Skipping empty crop for {label}_{os.path.splitext(image_name)[0]}_{i}.jpg")
            continue

        # Save the cropped image
        output_path = os.path.join(output_dir, f"{label}_{os.path.splitext(image_name)[0]}_{i}.jpg")
        cv2.imwrite(output_path, cropped)
        print(f"Saved: {output_path}")

print("Cropping complete!")

Processing: E:\DLSU\Corallian\Dataset\C-30\SHINE-1739(SanDiegoA)\SCORED\PB100151.cpc
Skipping invalid crop at (54640, 32169) in image PB100151.JPG. Crop is empty.
Skipping invalid crop at (45699, 5082) in image PB100151.JPG. Crop is empty.
Skipping invalid crop at (10830, 21703) in image PB100151.JPG. Crop is empty.
Skipping invalid crop at (8235, 41829) in image PB100151.JPG. Crop is empty.
Skipping invalid crop at (36247, 33230) in image PB100151.JPG. Crop is empty.
Skipping invalid crop at (46413, 9229) in image PB100151.JPG. Crop is empty.
Skipping invalid crop at (20628, 5277) in image PB100151.JPG. Crop is empty.
Skipping invalid crop at (19278, 11437) in image PB100151.JPG. Crop is empty.
Skipping invalid crop at (44887, 2609) in image PB100151.JPG. Crop is empty.
Skipping invalid crop at (8198, 6078) in image PB100151.JPG. Crop is empty.
Processing: E:\DLSU\Corallian\Dataset\C-30\SHINE-1739(SanDiegoA)\SCORED\PB100152.cpc
Skipping invalid crop at (14763, 25584) in image PB100152