In [3]:
import os
import torch
from ultralytics import YOLO
from PIL import Image

# Crop training images using YOLOv8 model


# Device selection (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Paths
input_folder = "../aml-2025-feathers-in-focus/train_images/train_images/"
output_folder = "../aml-2025-feathers-in-focus/train_images/cropped_train_images/"
os.makedirs(output_folder, exist_ok=True)

# Load YOLOv8 model

model = YOLO("yolov8s.pt")
model.to(device)

# Helper: crop from bounding box

def crop_with_bbox(image_path, bbox, save_path):
    """
    bbox = [x1, y1, x2, y2]
    """
    img = Image.open(image_path).convert("RGB")
    w, h = img.size

    x1, y1, x2, y2 = bbox
    # ensure within bounds
    x1 = max(0, int(x1))
    y1 = max(0, int(y1))
    x2 = min(w, int(x2))
    y2 = min(h, int(y2))

    cropped = img.crop((x1, y1, x2, y2))
    cropped.save(save_path)



# Process images

image_files = [f for f in os.listdir(input_folder)
               if f.lower().endswith((".jpg", ".jpeg", ".png"))]

print(f"Found {len(image_files)} images.")

for i, filename in enumerate(image_files):
    img_path = os.path.join(input_folder, filename)
    out_path = os.path.join(output_folder, filename)

    # Run YOLO inference
    results = model.predict(img_path, device=str(device), verbose=False)

    # Get detections
    detections = results[0].boxes

    if len(detections) == 0:
        # If no bird found, copy the full image instead of cropping
        Image.open(img_path).save(out_path)
        continue

    # YOLO box format: (x1, y1, x2, y2)
    # Choose the largest box (max area)
    boxes_xyxy = detections.xyxy.cpu().numpy()
    areas = [(b[2] - b[0]) * (b[3] - b[1]) for b in boxes_xyxy]
    largest_box = boxes_xyxy[areas.index(max(areas))]

    crop_with_bbox(img_path, largest_box, out_path)

    if i % 100 == 0:
        print(f"{i}/{len(image_files)} images processed...")

print("Done! Cropped images saved to:", output_folder)


Found 3926 images.
0/3926 images processed...
100/3926 images processed...
200/3926 images processed...
300/3926 images processed...
400/3926 images processed...
500/3926 images processed...
600/3926 images processed...
700/3926 images processed...
800/3926 images processed...
1000/3926 images processed...
1100/3926 images processed...
1200/3926 images processed...
1300/3926 images processed...
1400/3926 images processed...
1500/3926 images processed...
1600/3926 images processed...
1700/3926 images processed...
1800/3926 images processed...
1900/3926 images processed...
2000/3926 images processed...
2100/3926 images processed...
2200/3926 images processed...
2300/3926 images processed...
2400/3926 images processed...
2500/3926 images processed...
2600/3926 images processed...
2700/3926 images processed...
2800/3926 images processed...
2900/3926 images processed...
3000/3926 images processed...
3100/3926 images processed...
3200/3926 images processed...
3300/3926 images processed...
34

In [None]:
import os
import torch
from ultralytics import YOLO
from PIL import Image

# Crop test images using YOLOv8 model


# Device selection (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Paths
input_folder = "../aml-2025-feathers-in-focus/test_images/test_images/"
output_folder = "../aml-2025-feathers-in-focus/test_images/cropped_test_images/"
os.makedirs(output_folder, exist_ok=True)

# Load YOLOv8 model

model = YOLO("yolov8s.pt")
model.to(device)

# Helper: crop from bounding box

def crop_with_bbox(image_path, bbox, save_path):
    """
    bbox = [x1, y1, x2, y2]
    """
    img = Image.open(image_path).convert("RGB")
    w, h = img.size

    x1, y1, x2, y2 = bbox
    # ensure within bounds
    x1 = max(0, int(x1))
    y1 = max(0, int(y1))
    x2 = min(w, int(x2))
    y2 = min(h, int(y2))

    cropped = img.crop((x1, y1, x2, y2))
    cropped.save(save_path)



# Process images

image_files = [f for f in os.listdir(input_folder)
               if f.lower().endswith((".jpg", ".jpeg", ".png"))]

print(f"Found {len(image_files)} images.")

for i, filename in enumerate(image_files):
    img_path = os.path.join(input_folder, filename)
    out_path = os.path.join(output_folder, filename)

    # Run YOLO inference
    results = model.predict(img_path, device=str(device), verbose=False)

    # Get detections
    detections = results[0].boxes

    if len(detections) == 0:
        # If no bird found, copy the full image instead of cropping
        Image.open(img_path).save(out_path)
        continue

    # YOLO box format: (x1, y1, x2, y2)
    # Choose the largest box (max area)
    boxes_xyxy = detections.xyxy.cpu().numpy()
    areas = [(b[2] - b[0]) * (b[3] - b[1]) for b in boxes_xyxy]
    largest_box = boxes_xyxy[areas.index(max(areas))]

    crop_with_bbox(img_path, largest_box, out_path)

    if i % 100 == 0:
        print(f"{i}/{len(image_files)} images processed...")

print("Done! Cropped images saved to:", output_folder)


Found 4000 images.
0/4000 images processed...


In [2]:
# Tight crop

import os
import cv2
import torch
import torchvision
from torchvision.transforms import functional as F
import numpy as np
from tqdm import tqdm

# --------------------------------------------------
# PATHS
# --------------------------------------------------

SOURCE_DIR = "../aml-2025-feathers-in-focus/train_images/train_images/"
DEST_DIR   = "../aml-2025-feathers-in-focus/train_images/tightcut_train_images/"

os.makedirs(DEST_DIR, exist_ok=True)

# --------------------------------------------------
# LOAD MODEL
# --------------------------------------------------

print("Loading Mask R-CNN...")
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
model.eval()
model.cuda()  # remove if no GPU

# COCO label index for "bird"
BIRD_CLASS_ID = 15


# --------------------------------------------------
# PROCESS HELPER
# --------------------------------------------------

def crop_with_mask(image, mask):
    """
    Given an image and a boolean mask, return a tight crop around the mask.
    mask must be H × W (boolean or 0/1).
    """
    mask = mask.astype(np.uint8)

    # bounding box from mask
    ys, xs = np.where(mask == 1)
    if len(xs) == 0 or len(ys) == 0:
        return None

    x_min, x_max = xs.min(), xs.max()
    y_min, y_max = ys.min(), ys.max()

    cropped = image[y_min:y_max+1, x_min:x_max+1]
    return cropped


# --------------------------------------------------
# MAIN LOOP
# --------------------------------------------------

print("Processing images...")

for filename in tqdm(os.listdir(SOURCE_DIR)):
    if not filename.lower().endswith((".jpg", ".jpeg", ".png")):
        continue

    src_path = os.path.join(SOURCE_DIR, filename)
    dst_path = os.path.join(DEST_DIR, filename)

    # load image
    image = cv2.imread(src_path)
    if image is None:
        continue

    # convert to RGB for PyTorch
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_tensor = F.to_tensor(image_rgb).unsqueeze(0).cuda()

    # inference
    with torch.no_grad():
        out = model(img_tensor)[0]

    boxes = out["boxes"].cpu().numpy()
    labels = out["labels"].cpu().numpy()
    masks  = out["masks"].cpu().numpy()  # shape [N, 1, H, W]

    # filter for birds only
    bird_indices = np.where(labels == BIRD_CLASS_ID)[0]
    if len(bird_indices) == 0:
        # no bird found → skip file
        continue

    # pick largest bird mask
    best_idx = None
    best_area = 0

    for i in bird_indices:
        mask = masks[i, 0] > 0.5
        area = mask.sum()
        if area > best_area:
            best_area = area
            best_idx = i

    if best_idx is None:
        continue

    # get chosen mask
    mask = (masks[best_idx, 0] > 0.5)

    # crop tightly
    cropped = crop_with_mask(image, mask)
    if cropped is None:
        continue

    # save result
    cv2.imwrite(dst_path, cropped)

print("Done! Cropped images saved to:", DEST_DIR)


Loading Mask R-CNN...
Processing images...


100%|██████████| 3926/3926 [09:43<00:00,  6.73it/s]

Done! Cropped images saved to: ../aml-2025-feathers-in-focus/train_images/tightcut_train_images/





In [None]:
# Tight crop for test images

import os
import cv2
import torch
import torchvision
from torchvision.transforms import functional as F
import numpy as np
from tqdm import tqdm

# --------------------------------------------------
# PATHS
# --------------------------------------------------

SOURCE_DIR = "../aml-2025-feathers-in-focus/train_images/test_images/"
DEST_DIR   = "../aml-2025-feathers-in-focus/train_images/tightcut_test_images/"

os.makedirs(DEST_DIR, exist_ok=True)

# --------------------------------------------------
# LOAD MODEL
# --------------------------------------------------

print("Loading Mask R-CNN...")
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
model.eval()
model.cuda()  # remove if no GPU

# COCO label index for "bird"
BIRD_CLASS_ID = 15


# --------------------------------------------------
# PROCESS HELPER
# --------------------------------------------------

def crop_with_mask(image, mask):
    """
    Given an image and a boolean mask, return a tight crop around the mask.
    mask must be H × W (boolean or 0/1).
    """
    mask = mask.astype(np.uint8)

    # bounding box from mask
    ys, xs = np.where(mask == 1)
    if len(xs) == 0 or len(ys) == 0:
        return None

    x_min, x_max = xs.min(), xs.max()
    y_min, y_max = ys.min(), ys.max()

    cropped = image[y_min:y_max+1, x_min:x_max+1]
    return cropped


# --------------------------------------------------
# MAIN LOOP
# --------------------------------------------------

print("Processing images...")

for filename in tqdm(os.listdir(SOURCE_DIR)):
    if not filename.lower().endswith((".jpg", ".jpeg", ".png")):
        continue

    src_path = os.path.join(SOURCE_DIR, filename)
    dst_path = os.path.join(DEST_DIR, filename)

    # load image
    image = cv2.imread(src_path)
    if image is None:
        continue

    # convert to RGB for PyTorch
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_tensor = F.to_tensor(image_rgb).unsqueeze(0).cuda()

    # inference
    with torch.no_grad():
        out = model(img_tensor)[0]

    boxes = out["boxes"].cpu().numpy()
    labels = out["labels"].cpu().numpy()
    masks  = out["masks"].cpu().numpy()  # shape [N, 1, H, W]

    # filter for birds only
    bird_indices = np.where(labels == BIRD_CLASS_ID)[0]
    if len(bird_indices) == 0:
        # no bird found → skip file
        continue

    # pick largest bird mask
    best_idx = None
    best_area = 0

    for i in bird_indices:
        mask = masks[i, 0] > 0.5
        area = mask.sum()
        if area > best_area:
            best_area = area
            best_idx = i

    if best_idx is None:
        continue

    # get chosen mask
    mask = (masks[best_idx, 0] > 0.5)

    # crop tightly
    cropped = crop_with_mask(image, mask)
    if cropped is None:
        continue

    # save result
    cv2.imwrite(dst_path, cropped)

print("Done! Cropped images saved to:", DEST_DIR)
