## Prepare image

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np

# if input image is in range 0..1, please first multiply img by 255
# assume image is ndarray of shape [height, width, channels] where channels can be 1, 3 or 4
def imshow(img):
    import IPython
    _,ret = cv2.imencode('.jpg', img) 
    i = IPython.display.Image(data=ret)
    IPython.display.display(i)

all_classes = {
    0: "plane",
    1: "ship",
    2: "storage tank",
    3: "baseball diamond",
    4: "tennis court",
    5: "basketball court",
    6: "ground track field",
    7: "harbor",
    8: "bridge",
    9: "large vehicle",
    10: "small vehicle",
    11: "helicopter",
    12: "roundabout",
    13: "soccer ball field",
    14: "swimming pool",
}

classes_to_keep = [9, 10]
classes_to_exclude = [class_id for class_id in all_classes.keys() if class_id not in classes_to_keep]

In [None]:
full_image_path = f"../datasets/experiments/luchtfotos/beeldmateriaal.nl/2025_115000_487000_RGB_JPEG_hrl.tif"

rd_x = 115000
rd_y = 487000

cm_per_px = 8

full_image = cv2.imread(full_image_path)
print(f"Image shape: {full_image.shape}")

full_image_m = (full_image.shape[0] * cm_per_px) / 100
print(f"({full_image_m}m)")

In [None]:
start_x = 0
start_y = 0

image_size_px = 1024 * 4

part_image = full_image[
    start_y:start_y+image_size_px, 
    start_x:start_x+image_size_px
]

start_x_m = (start_x * cm_per_px) / 100
start_y_m = (start_y * cm_per_px) / 100
image_size_m = (image_size_px * cm_per_px) / 100

part_area = [
    rd_x + start_x_m, 
    rd_y + full_image_m - start_y_m - image_size_m, 
    rd_x + start_x_m + image_size_m, 
    rd_y + full_image_m - start_y_m
]

In [None]:
imshow(part_image)

## YOLO

In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("../datasets/experiments/model_weights/yolo11m-obb.pt")

In [None]:
# Predict with the model
yolo_result = model(part_image, conf=0.2, classes=classes_to_keep, agnostic_nms=True)[0]  # predict on an image

In [None]:
obb_boxes = yolo_result.obb.xyxyxyxy
obb_cls = yolo_result.obb.cls

## SAHI

In [None]:
from sahi import AutoDetectionModel
from sahi.predict import get_prediction, get_sliced_prediction

detection_model = AutoDetectionModel.from_pretrained(
    model_type='ultralytics',
    model_path="../datasets/experiments/model_weights/yolo11m-obb.pt", # any yolov8/yolov9/yolo11/yolo12/rt-detr det model is supported
    confidence_threshold=0.2,
    device="cpu", # or 'cuda:0' if GPU is available
    image_size=1024,
)

In [None]:
sahi_result = get_prediction(np.flip(part_image, 2), detection_model, exclude_classes_by_id=classes_to_exclude)

In [None]:
sahi_result.export_visuals(export_dir="../datasets/experiments/", file_name="sample1_sahi_1", hide_labels=True, hide_conf=True)

In [None]:
sahi_result = get_sliced_prediction(
    np.flip(part_image, 2),
    detection_model,
    # slice_height = 512,
    # slice_width = 512,
    slice_height = 1024,
    slice_width = 1024,
    overlap_height_ratio = 0.1,
    overlap_width_ratio = 0.1,
    exclude_classes_by_id=classes_to_exclude,
    postprocess_class_agnostic=True,
)

In [None]:
sahi_result.export_visuals(export_dir="../datasets/experiments/", file_name="sahi_test_1", hide_labels=True, hide_conf=True)

## Convert SAHI result to YOLO

In [None]:
obb_boxes = np.concatenate([[np.reshape(pred.mask.segmentation, [4, 2]) for pred in sahi_result.object_prediction_list]]).tolist()
obb_cls = np.array([int(pred.category.id) for pred in sahi_result.object_prediction_list]).tolist()

## Visualize bounding boxes

In [None]:
from ultralytics.utils.plotting import Annotator, colors

obb_names = model.names
obb_image = np.ascontiguousarray(part_image)

ann = Annotator(
    obb_image,
    line_width=None,  # default auto-size
    font_size=None,  # default auto-size
    font="Arial.ttf",  # must be ImageFont compatible
    pil=False,  # use PIL, otherwise uses OpenCV
)
for (i, cls_idx) in enumerate(obb_cls):
    obb = obb_boxes[i]
    # label = f"{obb_names.get(int(cls_idx))}"
    ann.box_label(
        box=obb,
        # label=label,
        # color=colors(cls_idx, True),
        color=(0, 180, 255),
    )

image_with_obb = ann.result()

imshow(image_with_obb)

In [None]:
filename = "../datasets/experiments/bigger_sahi_512.jpg"

cv2.imwrite(filename=filename, img=image_with_obb)

## Convert bounding boxes to GPD

In [None]:
import geopandas as gpd
import shapely.geometry as sg

RD_crs = "EPSG:28992"

In [None]:
multiplier = cm_per_px / 100
transformation_matrix = [multiplier, 0, 0, -multiplier, part_area[0], part_area[1]+image_size_m]

obb_geoms = gpd.GeoSeries(data=[sg.Polygon(coords) for coords in obb_boxes]).affine_transform(transformation_matrix)

detections_gdf = gpd.GeoDataFrame(
    data={
        "class_id": obb_cls,
        "geometry": obb_geoms
    },
    crs=RD_crs
)

In [None]:
detections_gdf.plot()

## BGT

In [None]:
import geopandas as gpd
import shapely.geometry as sg

bgt_wegdeel = gpd.read_file("../datasets/experiments/bgt/115000_487000/bgt_wegdeel.gml")

In [None]:
bgt_wegdeel.function.unique()

In [None]:
bgt_wegdeel = bgt_wegdeel[bgt_wegdeel["eindRegistratie"].isna()]

In [None]:
bgt_voetpad = bgt_wegdeel[bgt_wegdeel.function.isin(['voetpad', 'fietspad', 'voetpad op trap', 'voetgangersgebied'])]
area_poly = sg.box(*part_area)
bgt_voetpad_area = bgt_voetpad.intersection(area_poly)
bgt_voetpad_area = bgt_voetpad_area[~bgt_voetpad_area.is_empty]
bgt_voetpad_area.plot()

In [None]:
bgt_parkeervlak = bgt_wegdeel[bgt_wegdeel.function.isin(['parkeervlak'])]
bgt_parkeervlak_area = bgt_parkeervlak.intersection(area_poly)
bgt_parkeervlak_area = bgt_parkeervlak_area[~bgt_parkeervlak_area.is_empty]
bgt_parkeervlak_area.plot()

In [None]:
%matplotlib inline

padding = 10

fig, ax = plt.subplots(1, figsize=(10, 10), constrained_layout=True)

[x_min, y_min, x_max, y_max] = map(int, part_area)

bgt_voetpad_area.plot(ax=ax, color="red", alpha=0.25)
bgt_parkeervlak_area.plot(ax=ax, color="green", alpha=0.25)

ax.set_xlabel('X')
ax.set_ylabel('Y')

ax.set_xticks(range(x_min, x_max+1, 100))
ax.set_xticklabels(range(x_min, x_max+1, 100))
ax.set_yticks(range(y_min, y_max+1, 100))
ax.set_yticklabels(range(y_min, y_max+1, 100))

ax.set_xlim((x_min - padding, x_max + padding))
ax.set_ylim((y_min - padding, y_max + padding))
ax.set_aspect('equal', adjustable='box')

extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
plt.savefig("../datasets/experiments/bigger_bgt.jpg", bbox_inches=extent, dpi=450)

plt.show()

## Compute wrongly parked cars

In [None]:
detections_gdf["percentage_on_sidewalk"] = (
    detections_gdf.intersection(bgt_voetpad_area.union_all(method="unary")).area 
    / detections_gdf.area
)
detections_gdf["wrongly_parked"] = detections_gdf["percentage_on_sidewalk"] >= 0.25

## Visualise all together

In [None]:
%matplotlib inline

padding = 10

fig, ax = plt.subplots(1, figsize=(10, 10), constrained_layout=True)

[x_min, y_min, x_max, y_max] = map(int, part_area)

ax.imshow(np.flip(part_image, 2), extent=[x_min, x_max, y_min, y_max])

bgt_voetpad_area.plot(ax=ax, color="red", alpha=0.25)
bgt_parkeervlak_area.plot(ax=ax, color="green", alpha=0.25)
detections_gdf[~detections_gdf["wrongly_parked"]].boundary.plot(ax=ax, color=np.array([255, 180, 0]) / 255)
detections_gdf[detections_gdf["wrongly_parked"]].boundary.plot(ax=ax, color=np.array([255, 0, 189]) / 255)

ax.set_xlabel('X')
ax.set_ylabel('Y')

ax.set_xticks(range(x_min, x_max+1, 100))
ax.set_xticklabels(range(x_min, x_max+1, 100))
ax.set_yticks(range(y_min, y_max+1, 100))
ax.set_yticklabels(range(y_min, y_max+1, 100))

ax.set_xlim((x_min - padding, x_max + padding))
ax.set_ylim((y_min - padding, y_max + padding))
ax.set_aspect('equal', adjustable='box')

extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
plt.savefig("../datasets/experiments/bigger_combined.jpg", bbox_inches=extent, dpi=450)

plt.show()