In [None]:
import os
import cv2
import sys
import torch
import torchvision
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torchvision import transforms

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
model_path = os.path.join(project_root, 'images')
if model_path not in sys.path:
    sys.path.append(model_path)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

hf_path = 'https://huggingface.co/jspark2000/yolov5-vehicle/resolve/main/best.pt'
yolov5_model = torch.hub.load('ultralytics/yolov5', 'custom', path=hf_path, force_reload=True).to(device)

mask_rcnn_model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(pretrained=True).to(device)
mask_rcnn_model.eval()

In [None]:
def pad_to_square(image):
    width, height = image.size
    max_side = max(width, height)
    padding = (
        (max_side - width) // 2,
        (max_side - height) // 2,
        (max_side - width + 1) // 2,
        (max_side - height + 1) // 2,
    )
    return torchvision.transforms.functional.pad(image, padding, fill=0, padding_mode='constant')

def load_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f"Image file not found at {image_path}")
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image_rgb

def detect_objects(image, confidence_threshold=0.5):
    results = yolov5_model(image)
    boxes = results.xyxy[0].cpu().numpy()
    filtered_boxes = [box for box in boxes if box[4] >= confidence_threshold]
    return filtered_boxes

def segment_objects(image, boxes, target_size=(512, 512)):
    if not boxes:
        return []

    largest_box = max(boxes, key=lambda box: (box[2] - box[0]) * (box[3] - box[1]))

    transform = torchvision.transforms.Compose([
        torchvision.transforms.ToPILImage(),
        torchvision.transforms.Lambda(pad_to_square),
        torchvision.transforms.Resize(target_size, interpolation=torchvision.transforms.InterpolationMode.BILINEAR),
        torchvision.transforms.ToTensor()
    ])

    image_tensor = torch.from_numpy(image).permute(2, 0, 1).float() / 255.0
    image_tensor = image_tensor.to(device)

    x1, y1, x2, y2 = map(int, largest_box[:4])
    cropped_image = image_tensor[:, y1:y2, x1:x2]
    resized_image = transform(cropped_image.permute(1, 2, 0).cpu().numpy()).to(device)
    resized_image = resized_image.unsqueeze(0)

    with torch.no_grad():
        output = mask_rcnn_model(resized_image)

    max_height = 0
    max_width = 0
    best_mask = None

    for i, mask in enumerate(output[0]['masks']):
        mask_np = mask[0].mul(255).byte().cpu().numpy()
        mask_resized = cv2.resize(mask_np, (x2 - x1, y2 - y1), interpolation=cv2.INTER_NEAREST)
        mask_full_image = np.zeros(image.shape[:2], dtype=np.uint8)
        mask_full_image[y1:y2, x1:x2] = mask_resized
        height = calculate_height_from_mask(mask_np)
        width = calculate_width_from_mask(mask_np)

        if height > max_height and output[0]['labels'][i] == 3:
            max_height = height
            max_width = width
            best_mask = mask_full_image

    return [(x1, y1, x2, y2, best_mask, max_height, max_width)] if best_mask is not None else []


def calculate_width_from_mask(mask):
    widths = []
    for row in range(mask.shape[0]):
        x_indices = np.where(mask[row, :] > 127)[0]
        if len(x_indices) > 0:
            width = np.max(x_indices) - np.min(x_indices)
            widths.append(width)
    return max(widths) if widths else 0


def calculate_height_from_mask(mask):
    heights = []
    for col in range(mask.shape[1]):
        y_indices = np.where(mask[:, col] > 127)[0]
        if len(y_indices) > 0:
            height = np.max(y_indices) - np.min(y_indices)
            heights.append(height)
    return max(heights) if heights else 0


def process_dataset(dataset_path):
    results = []

    for folder_name in os.listdir(dataset_path):
        folder_path = os.path.join(dataset_path, folder_name)
        if os.path.isdir(folder_path):
            for file_name in os.listdir(folder_path):
                if file_name.endswith(".JPG"):
                    file_path = os.path.join(folder_path, file_name)

                    angle = int(file_name.split('_H')[1].split('_V')[0])

                    image_rgb = load_image(file_path)

                    boxes = detect_objects(image_rgb)
                    vehicle_boxes = [box for box in boxes if int(box[5]) in [1, 10]]

                    masks = segment_objects(image_rgb, vehicle_boxes)

                    for x1, y1, x2, y2, best_mask, max_height, max_width in masks:
                        if max_height is not None and max_width is not None:
                            ratio = max_height / max_width if max_width != 0 else 0
                            results.append([file_path, max_height, ratio, angle])

    df = pd.DataFrame(results, columns=["image_path", "max_height", "ratio", "angle"])
    df.to_csv("mask_data.csv", index=False)

In [None]:
dataset_path = "../images/angles"
process_dataset(dataset_path)