In [None]:
import os
import sys
import pandas as pd
import tqdm
import cv2
import plotly.graph_objects as go
from scipy.stats import gaussian_kde
import numpy as np
import pandas as pd
import plotly.express as px

This notebook calculates bbox size statistics over all dataset after resizing and after slicing and resizing

In [8]:
dataset_path = "../../data/merged"

# Create a DataFrame to hold the dataset annotations
df = pd.DataFrame(
    {
        "image_task": [],
        "image_name": [],
        "image_path": [],
        "image_width": [],
        "image_height": [],
        "class_id": [],
        "x_center": [],
        "y_center": [],
        "width": [],
        "height": [],
    }
)

# Fill the DataFrame with data from the dataset
for image_task in ["train", "val"]:
    dataset_annotations_path = os.path.join(dataset_path, "labels", image_task)
    dataset_images_path = os.path.join(dataset_path, "images", image_task)

    for image_name in tqdm.tqdm(os.listdir(dataset_images_path)):
        image_path = os.path.join(dataset_images_path, image_name)
        image = cv2.imread(image_path)
        if image is None:
            print(f"Warning: {image_path} could not be read.")
            continue

        height, width, _ = image.shape

        annotation_file = os.path.join(
            dataset_annotations_path, f"{os.path.splitext(image_name)[0]}.txt"
        )
        if not os.path.exists(annotation_file):
            print(f"Warning: {annotation_file} does not exist.")
            continue

        with open(annotation_file, "r") as f:
            for line in f:
                if line.strip():
                    class_id, x_center, y_center, w, h = map(float, line.strip().split())
                    df.loc[len(df)] = [
                        image_task,
                        image_name,
                        image_path,
                        width,
                        height,
                        int(class_id),
                        x_center,
                        y_center,
                        w,
                        h,
                    ]

df.head()

100%|██████████| 59022/59022 [31:18<00:00, 31.42it/s] 
100%|██████████| 1000/1000 [01:35<00:00, 10.44it/s]


In [27]:
df.to_csv("dataset_annotations.csv")

In [182]:
imgsz      = 1024              # target YOLO image size
slice_size = 2048        # SAHI slice size (same as imgsz here)
min_stride = 4

# Work on a trimmed copy to avoid mutating the original df
cols = ["width", "height", "image_width", "image_height"]
# df_copy = df[df["image_task"] == "val"][cols].copy()
df_copy = df[cols].copy()

# 1️⃣  Denormalise bbox (multiply once, no loop) -----------------------------
df_copy[["width", "height"]] = (
    df_copy[["width", "height"]].values *
    df_copy[["image_width", "image_height"]].values
)

# 2️⃣  Scale needed for full-image resize -----------------------------------
im_max = df_copy[["image_width", "image_height"]].max(axis=1).to_numpy()
k      = imgsz / im_max                                      # shape (N,)

# 3️⃣  Scale needed after SAHI slicing --------------------------------------
#    • keep k where the original max side < slice_size
#    • otherwise use imgsz/slice_size  (==1.0 in this configuration)
k_sahi = np.where(im_max < slice_size, k, imgsz / slice_size)

# 4️⃣  Final bbox sizes (all NumPy math) ------------------------------------
df_copy["k"]                     = k
df_copy["k_sahi"]                = k_sahi
df_copy["width_after_resize"]         = df_copy["width"].to_numpy()  * k
df_copy["height_after_resize"]        = df_copy["height"].to_numpy() * k
df_copy["width_after_resize_sahi"]    = df_copy["width"].to_numpy()  * k_sahi
df_copy["height_after_resize_sahi"]   = df_copy["height"].to_numpy() * k_sahi


# df_copy.head()

# 4️⃣ stride-normalised sizes
df_copy["w_stride"]        = df_copy["width_after_resize"]        / min_stride
df_copy["h_stride"]        = df_copy["height_after_resize"]       / min_stride
df_copy["w_stride_sahi"]   = df_copy["width_after_resize_sahi"]   / min_stride
df_copy["h_stride_sahi"]   = df_copy["height_after_resize_sahi"]  / min_stride

# ───────────── descriptive statistics ───────────── #
print(
    df_copy[
        ["w_stride", "h_stride", "w_stride_sahi", "h_stride_sahi"]
    ].describe().T
)

print()

print(
    df_copy[
        ["w_stride", "h_stride", "w_stride_sahi", "h_stride_sahi"]
    ].quantile(0.01).T
)


                 count       mean       std       min       25%       50%  \
w_stride       50071.0   6.836973  4.090048  0.047122  3.992947  5.780811   
h_stride       50071.0   6.703837  4.188013  0.047115  3.863242  5.642212   
w_stride_sahi  50071.0  10.641636  4.842679  0.125900  7.426849  9.803368   
h_stride_sahi  50071.0  10.383525  4.856803  0.111549  7.221364  9.379747   

                     75%        max  
w_stride        8.746079  96.766884  
h_stride        8.386031  95.553681  
w_stride_sahi  13.562607  96.766884  
h_stride_sahi  12.895615  95.553681  

w_stride         1.554715
h_stride         1.649262
w_stride_sahi    2.355629
h_stride_sahi    2.544078
Name: 0.01, dtype: float64


In [None]:
# 4️⃣ stride-normalised sizes
df_copy["w_stride"]        = df_copy["width_after_resize"]        / min_stride
df_copy["h_stride"]        = df_copy["height_after_resize"]       / min_stride
df_copy["w_stride_sahi"]   = df_copy["width_after_resize_sahi"]   / min_stride
df_copy["h_stride_sahi"]   = df_copy["height_after_resize_sahi"]  / min_stride

# ───────────── descriptive statistics ───────────── #
print(
    df_copy[
        ["w_stride", "h_stride", "w_stride_sahi", "h_stride_sahi"]
    ].describe().T
)

print()

print(
    df_copy[
        ["w_stride", "h_stride", "w_stride_sahi", "h_stride_sahi"]
    ].quantile(0.01).T
)

# # --- build 2-D histograms with identical bin edges ----------
# bins = [np.linspace(df_copy["w_stride"].min(),
#                     df_copy["w_stride"].max(), 81),
#         np.linspace(df_copy["h_stride"].min(),
#                     df_copy["h_stride"].max(), 81)]

# H1, *_ = np.histogram2d(df_copy["w_stride"],
#                         df_copy["h_stride"], bins=bins)
# H2, *_ = np.histogram2d(df_copy["w_stride_sahi"],
#                         df_copy["h_stride_sahi"], bins=bins)

# # --- JS distance --------------------------------------------
# eps = 1e-12                        # avoid log(0)
# P = H1 / H1.sum()
# Q = H2 / H2.sum()
# M = 0.5*(P+Q)

# KL = lambda A, B: np.sum(A * np.log((A+eps)/(B+eps)))
# js_div = 0.5*KL(P, M) + 0.5*KL(Q, M)
# js_dist = np.sqrt(js_div)          # metric version

# print()
# print(f"Jensen–Shannon distance: {js_dist:.4f}")

# # ───────────── 2-D distribution visualisation ───────────── #
# fig1 = px.density_heatmap(
#     df_copy, x="w_stride", y="h_stride",
#     nbinsx=80, nbinsy=80,
#     title="Density of bbox (resize) / min_stride"
# )
# fig1.show()      # interactive heat-map

# fig2 = px.density_heatmap(
#     df_copy, x="w_stride_sahi", y="h_stride_sahi",
#     nbinsx=80, nbinsy=80,
#     title="Density of bbox (resize + SAHI) / min_stride"
# )
# fig2.show()



# # kernel densities on a grid
# grid_x, grid_y = np.mgrid[xbins.min():xbins.max():80j,
#                           ybins.min():ybins.max():80j]
# positions = np.vstack([grid_x.ravel(), grid_y.ravel()])

# kde1 = gaussian_kde(df_copy[["w_stride",      "h_stride"]].T)(positions).reshape(grid_x.shape)
# kde2 = gaussian_kde(df_copy[["w_stride_sahi", "h_stride_sahi"]].T)(positions).reshape(grid_x.shape)

# fig = go.Figure()
# fig.add_trace(go.Contour(z=kde1, x=xbins, y=ybins,
#                          colorscale="Blues", showscale=False,
#                          contours=dict(showlabels=False), opacity=0.7))
# fig.add_trace(go.Contour(z=kde2, x=xbins, y=ybins,
#                          colorscale="Reds", showscale=False,
#                          contours=dict(showlabels=False, coloring="lines")))
# fig.update_layout(title="Blue = resize  |  Red lines = resize+SAHI",
#                   xaxis_title="w_stride", yaxis_title="h_stride")
# fig.show()

# fig = make_subplots(rows=2, cols=2,
#                     column_widths=[0.7, 0.3], row_heights=[0.3, 0.7],
#                     specs=[[{"type":"histogram"}, {"type":"histogram"}],
#                            [{"type":"histogram2d"}, {"type":"histogram"}]])

# fig = make_subplots(rows=2, cols=2,
#                     column_widths=[0.7, 0.3], row_heights=[0.3, 0.7],
#                     specs=[[{"type":"histogram"}, {"type":"histogram"}],
#                            [{"type":"histogram2d"}, {"type":"histogram"}]])

# # top-left: width marginals
# fig.add_trace(
#     go.Histogram(x=df_copy["w_stride"], name="resize", nbinsx=80, opacity=0.6),
#     1,1
# )
# fig.add_trace(
#     go.Histogram(x=df_copy["w_stride_sahi"], name="resize+SAHI", nbinsx=80, opacity=0.6),
#     1,1
# )

# # bottom-right: height marginals
# fig.add_trace(
#     go.Histogram(y=df_copy["h_stride"], name="resize", nbinsy=80, opacity=0.6, orientation='h'),
#     2,2
# )
# fig.add_trace(
#     go.Histogram(y=df_copy["h_stride_sahi"], name="resize+SAHI", nbinsy=80, opacity=0.6, orientation='h'),
#     2,2
# )

# # centre: joint 2-D heat-map
# fig.add_trace(
#     px.density_heatmap(df_copy, x="w_stride", y="h_stride",
#                        nbinsx=80, nbinsy=80).data[0],
#     2,1
# )
# fig.update_layout(barmode='overlay', showlegend=False, height=600, width=800)
# fig.show()



                 count      mean       std       min       25%       50%  \
w_stride       50071.0  3.418487  2.045024  0.023561  1.996473  2.890406   
h_stride       50071.0  3.351918  2.094006  0.023557  1.931621  2.821106   
w_stride_sahi  50071.0  5.320818  2.421339  0.062950  3.713425  4.901684   
h_stride_sahi  50071.0  5.191763  2.428402  0.055775  3.610682  4.689874   

                    75%        max  
w_stride       4.373039  48.383442  
h_stride       4.193015  47.776840  
w_stride_sahi  6.781303  48.383442  
h_stride_sahi  6.447807  47.776840  

w_stride         0.777358
h_stride         0.824631
w_stride_sahi    1.177815
h_stride_sahi    1.272039
Name: 0.01, dtype: float64

Jensen–Shannon distance: 0.3739
