In [1]:
import os
import cv2
import pandas as pd
import numpy as np

In [2]:
def is_blank_region(region, blank_threshold):
    # Convert to grayscale if it's a color image
    if len(region.shape) == 3:
        region = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)

    # Count white pixels (assume white is near 255)
    white_pixels = np.sum(region >= 64)  # PARAMETER
    total_pixels = region.size
    return (white_pixels / total_pixels) >= blank_threshold


def expand_bounding_box(image, bbox, left_expand, right_expand, vertical_region_height, step, blank_threshold):
    x_min, y_min, x_max, y_max = bbox
    height, width = image.shape[:2]

    # Horizontal expansion (fixed amount)
    x_min_expanded = max(0, x_min - left_expand)
    x_max_expanded = min(width, x_max + right_expand)

    # Vertical expansion using blank detection
    # Check upward
    for offset in range(0, height, step):  # Move the region upwards in steps
        new_y_min = max(0, y_min - offset - vertical_region_height)
        region = image[new_y_min:new_y_min + vertical_region_height, x_min:x_max]
        if is_blank_region(region, blank_threshold):
            y_min = new_y_min # + vertical_region_height / 2
            break

    # Check downward
    for offset in range(0, height, step):  # Move the region downwards in steps
        new_y_max = min(height, y_max + offset + vertical_region_height)
        region = image[new_y_max - vertical_region_height:new_y_max, x_min:x_max]
        if is_blank_region(region, blank_threshold):
            y_max = new_y_max # - vertical_region_height / 2
            break

    return [x_min_expanded, y_min, x_max_expanded, y_max]


In [3]:
def crop_bounding_boxes(csv_path, image_folder, output_folder, left_expand=100, right_expand=100, vertical_region_height=45,
                                 step=5, blank_threshold=0.97):
    
    data = pd.read_csv(csv_path)
    os.makedirs(output_folder, exist_ok=True)

    new_rows = []    # List to store updated rows for the new CSV

    for _, row in data.iterrows():
        file_name = row["File Name"]
        object_number = row["Object Number"]
        
        if object_number == 1:
            print(f"Start processing {file_name}")
        bbox = [int(float(coord)) for coord in row["BoundingBox"].strip("[]").split(", ")]

        image_path = os.path.join(image_folder, file_name)
        if not os.path.exists(image_path):
            print(f"Image file {file_name} not found in {image_folder}. Skipping...")
            continue

        image = cv2.imread(image_path)
        if image is None:
            print(f"Failed to read image {image_path}. Skipping...")
            continue

        # Expand the bounding box
        expanded_bbox = expand_bounding_box(image, bbox, left_expand, right_expand, vertical_region_height, step, blank_threshold)
        x_min, y_min, x_max, y_max = expanded_bbox

        # Crop the expanded region
        crop = image[y_min:y_max, x_min:x_max]

        # Save the crop with the naming convention
        crop_name = f"{file_name.split('.')[0]}_{object_number}.png"
        crop_path = os.path.join(output_folder, crop_name)
        cv2.imwrite(crop_path, crop)

        # Calculate new bounding box and centroid relative to the crop
        slur_bbox_in_crop = [bbox[0] - x_min, bbox[1] - y_min, bbox[2] - x_min, bbox[3] - y_min]
        centroid_x = (slur_bbox_in_crop[0] + slur_bbox_in_crop[2]) / 2
        centroid_y = (slur_bbox_in_crop[1] + slur_bbox_in_crop[3]) / 2
        centroid_in_crop = [centroid_x, centroid_y]

        # Update the row with new information
        updated_row = row.to_dict()
        updated_row["Crop File Name"] = crop_name
        updated_row["Crop Position"] = expanded_bbox
        updated_row["Centroid in Crop"] = centroid_in_crop
        updated_row["BoundingBox in Crop"] = slur_bbox_in_crop
        new_rows.append(updated_row)

    # Save the updated rows to a new CSV
    new_csv_path = "output_crops.csv"
    new_data = pd.DataFrame(new_rows)
    new_data.to_csv(new_csv_path, index=False)
    print(f"Updated CSV saved to {new_csv_path}")

In [4]:
csv_path = "output_objects.csv"
image_folder = "raw"
output_folder = "crops"

crop_bounding_boxes(csv_path, image_folder, output_folder, left_expand=100, right_expand=100, vertical_region_height=45)

Start processing HN894_220.png
Start processing HN894_221.png
Start processing HN894_222.png
Start processing HN894_223.png
Start processing HN894_224.png
Start processing HN894_225.png
Start processing HN894_226.png
Start processing HN894_227.png
Start processing HN894_229.png
Start processing HN894_230.png
Start processing HN894_231.png
Start processing HN894_232.png
Start processing HN894_233.png
Start processing HN894_234.png
Start processing HN894_235.png
Start processing HN894_236.png
Start processing HN894_237.png
Start processing HN894_238.png
Start processing HN894_239.png
Start processing HN894_240.png
Start processing HN894_241.png
Start processing HN894_242.png
Start processing HN894_243.png
Start processing HN894_244.png
Start processing HN894_245.png
Start processing HN894_246.png
Start processing HN894_247.png
Start processing HN894_248.png
Start processing HN894_249.png
Start processing HN894_250.png
Start processing HN894_251.png
Start processing HN894_252.png
Start pr