In [324]:
import os
import torch
from torchvision import transforms
from ultralytics import YOLO
from PIL import Image, ImageOps, ImageDraw, ImageFont
import glob
from torchvision.transforms.functional import resize, pad
import numpy as np


In [325]:
def load_yolo_model(model_name):
    # Define model path based on model name
    model_path = f"{model_name}.pt"  # Adjust path as needed
    yolo_model = YOLO(model_path)
    return yolo_model

In [326]:
def resize_and_pad(image, stride=32, max_size=640):
    # Resize the image, maintaining aspect ratio
    ratio = min(max_size / image.size[0], max_size / image.size[1])
    new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
    resized_image = image.resize(new_size, Image.Resampling.LANCZOS)

    # Pad the resized image to be divisible by the stride
    width, height = resized_image.size
    new_width = width if width % stride == 0 else width + stride - width % stride
    new_height = height if height % stride == 0 else height + stride - height % stride
    padded_image = ImageOps.expand(resized_image, border=(0, 0, new_width - width, new_height - height), fill=0)
    return padded_image, resized_image.size

In [327]:
# def detect_and_save_labels(image_path, yolo_model, output_dir):
#     image = Image.open(image_path)
#     resized_padded_image = resize_and_pad(image)

#     # Convert image to tensor
#     transform = transforms.Compose([transforms.ToTensor()])
#     img_tensor = transform(resized_padded_image).unsqueeze(0)

#     # Predict using the YOLO model
#     results = yolo_model(img_tensor)

#     # Ensure the first result object is correctly accessed
#     if results:
#         detection_results = results[0]

#         # Create the label file
#         label_file = os.path.splitext(os.path.basename(image_path))[0] + ".txt"
#         label_path = os.path.join(output_dir, label_file)

#         with open(label_path, 'w') as f:
#             for i, bbox in enumerate(detection_results.boxes.xyxy):
#                 class_id = detection_results.boxes.cls[i]
#                 class_name = results[0].names[class_id.item() if isinstance(class_id, torch.Tensor) else class_id]
#                 conf = results[0].boxes.conf[i]
                
#                 x1, y1, x2, y2 = bbox.tolist()

#                 # Only write 'person' detections with confidence > 0.5
#                 if class_name == "person" and conf > 0.5:
#                     scale_x = image.size[0] / resized_padded_image.size[0]
#                     scale_y = image.size[1] / resized_padded_image.size[1]
#                     x_center = ((x1 + x2) / 2) * scale_x
#                     y_center = ((y1 + y2) / 2) * scale_y
#                     width = (x2 - x1) * scale_x
#                     height = (y2 - y1) * scale_y
#                     f.write(f"person {x_center:.2f} {y_center:.2f} {width:.2f} {height:.2f}\n")


In [328]:
def detect_and_save_labels(image_path, yolo_model, output_dir):
    image = Image.open(image_path)
    # Predict using the YOLO model
    results = yolo_model(image)

    if results:
        detection_results = results[0]
        # orig_width, orig_height = resized_size

        # Create the label file
        label_file = os.path.splitext(os.path.basename(image_path))[0] + ".txt"
        label_path = os.path.join(output_dir, label_file)

        labels_rescale = []
        with open(label_path, 'w') as f:
            for i, box in enumerate(detection_results.boxes.xyxy):
                cls_id = detection_results.boxes.cls[i].cpu().item()
                conf = detection_results.boxes.conf[i].cpu().item()
                x1, y1, x2, y2 = box.cpu().tolist()

                print(f"Class: {cls_id}, Confidence: {conf:.2f}, Bounding Box: {x1:.2f}, {y1:.2f}, {x2:.2f}, {y2:.2f} for {image_path}")

                # Calculate original scale bounding box
                # x1_orig = (x1 / resized_padded_image.size[0]) * orig_width
                # y1_orig = (y1 / resized_padded_image.size[1]) * orig_height
                # x2_orig = (x2 / resized_padded_image.size[0]) * orig_width
                # y2_orig = (y2 / resized_padded_image.size[1]) * orig_height

                # print(f"Original Scale Bounding Box: {x1_orig:.2f}, {y1_orig:.2f}, {x2_orig:.2f}, {y2_orig:.2f} for {image_path}")

                # Append to list and write to file
                # labels_rescale.append([cls_id, x1_orig, y1_orig, x2_orig, y2_orig])
                f.write(f"person {x1:.2f} {y1:.2f} {x2:.2f} {y2:.2f}\n")

        # Convert labels_rescale to numpy array if needed
        labels_rescale = np.array(labels_rescale, dtype=np.float32)

In [334]:
model_name = "yolov8n"
yolo_model = load_yolo_model(model_name)
input_dir = "dataset/inria/Train/pos/"
output_dir = f"dataset/inria/Train/pos/yolo-labels_{model_name}/"
os.makedirs(output_dir, exist_ok=True)

# Process all images
for image_file in glob.glob(os.path.join(input_dir, '*.png')):
    detect_and_save_labels(image_file, yolo_model, output_dir)


0: 640x544 8 persons, 1 tie, 13.0ms
Speed: 2.0ms preprocess, 13.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 544)
Class: 0.0, Confidence: 0.89, Bounding Box: 265.26, 120.14, 506.55, 710.75 for dataset/inria/Train/pos\crop001001.png
Class: 0.0, Confidence: 0.88, Bounding Box: 30.17, 325.73, 211.92, 716.71 for dataset/inria/Train/pos\crop001001.png
Class: 0.0, Confidence: 0.87, Bounding Box: 165.59, 177.87, 294.45, 650.88 for dataset/inria/Train/pos\crop001001.png
Class: 0.0, Confidence: 0.86, Bounding Box: 499.16, 286.38, 818.00, 976.00 for dataset/inria/Train/pos\crop001001.png
Class: 0.0, Confidence: 0.74, Bounding Box: 2.69, 134.45, 183.69, 474.33 for dataset/inria/Train/pos\crop001001.png
Class: 27.0, Confidence: 0.60, Bounding Box: 195.42, 274.61, 222.28, 386.64 for dataset/inria/Train/pos\crop001001.png
Class: 0.0, Confidence: 0.37, Bounding Box: 0.00, 161.72, 61.35, 393.77 for dataset/inria/Train/pos\crop001001.png
Class: 0.0, Confidence: 0.35, Bounding Box: 0