Skip to content
Merged

Dev #119

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,19 @@ export const trainingRectangleLabelSchema = z.object({
...trainingSharedLabelSchema,
});

/**
* Optional dataset-prep tweak that splits "bridged" polygon annotations into
* one label per visually-disconnected piece. Resolved from human-readable
* names in customHyperparams (`split_classes`) to internal indices on the
* API side before being sent to the ML service. See
* training-external.service.ts for the resolution + validation logic.
*/
export const trainingPolygonSplitSchema = z.object({
enabled: z.boolean(),
class_indices: z.number().int().nonnegative().array(),
kernel_size: z.number().int().positive(),
});

export const trainingConfigSchema = z.object({
output_types: z.enum(ModelOutputTypeEnum).array(),
epochs: z.number(),
Expand All @@ -123,6 +136,7 @@ export const trainingConfigSchema = z.object({
.array(),
}),
custom_hyperparams: z.record(z.string(), z.unknown()).default({}),
polygon_split: trainingPolygonSplitSchema.optional(),
});

export const trainingOutputUploadSchema = z.object({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,16 @@ export class TrainingExternalService {
}
})

// Extract the polygon-split feature flag (if any) from customHyperparams
// before forwarding the remaining hyperparams. The split keys are an
// ml-yolo / Luxonis dataset-prep concern, not trainer knobs, so they
// must NOT leak into the YAML config that gets deep-merged into the
// training runtime config.
const { polygon_split, remainingHyperparams } = this.extractPolygonSplit(
model.customHyperparams,
model.labels,
);

const basePayload: TrainingBasePayload = {
id: model.id,
output_config: outputUploads,
Expand Down Expand Up @@ -491,7 +501,8 @@ export class TrainingExternalService {
keep_original: pp.keepOriginal,
})),
},
custom_hyperparams: model.customHyperparams,
custom_hyperparams: remainingHyperparams,
...(polygon_split && { polygon_split }),
},
};

Expand Down Expand Up @@ -560,4 +571,96 @@ export class TrainingExternalService {
}
}
}

/**
* Pull the bridged-polygon-split feature flag out of customHyperparams,
* resolve its `split_classes` (human-readable label names) into the
* 0-based class indices the ML service uses, and return the remaining
* hyperparams with the split keys removed so they don't pollute the
* downstream trainer config.
*
* The flag is fully optional: when `split_bridged_polygons` is absent
* or falsy, this returns the original hyperparams unchanged.
*
* Throws BadRequestException with an actionable message on misconfiguration
* — empty class list, unknown names, non-positive / even kernel size,
* wrong types — to fail fast at the train-trigger step rather than
* silently disabling the feature mid-training.
*/
private extractPolygonSplit(
customHyperparams: Record<string, unknown>,
labels: ModelEntity["labels"],
): {
polygon_split:
| { enabled: true; class_indices: number[]; kernel_size: number }
| undefined;
remainingHyperparams: Record<string, unknown>;
} {
const rest = { ...customHyperparams };
const enabledRaw = rest.split_bridged_polygons;
const classesRaw = rest.split_classes;
const kernelRaw = rest.split_kernel_size;

// Always strip the keys from what we pass to Luxonis, even if the flag
// is off — they're never valid trainer config.
delete rest.split_bridged_polygons;
delete rest.split_classes;
delete rest.split_kernel_size;

if (!enabledRaw) {
return { polygon_split: undefined, remainingHyperparams: rest };
}

if (!Array.isArray(classesRaw) || classesRaw.length === 0) {
throw new BadRequestException(
"split_bridged_polygons is enabled but split_classes is missing or empty. " +
"Provide a non-empty array of label names, e.g. [\"kapie\", \"sunkovy salam\"].",
);
}

const labelNameToIndex = new Map<string, number>(
labels.map((l, i) => [l.name, i]),
);

const unknown: string[] = [];
const indices: number[] = [];
for (const name of classesRaw) {
if (typeof name !== "string") {
throw new BadRequestException(
`split_classes entries must be strings, got ${typeof name}: ${JSON.stringify(name)}`,
);
}
const idx = labelNameToIndex.get(name);
if (idx === undefined) {
unknown.push(name);
} else {
indices.push(idx);
}
}

if (unknown.length > 0) {
const available = labels.map((l) => l.name).join(", ");
throw new BadRequestException(
`split_classes contains label name(s) not in this model: [${unknown.join(", ")}]. ` +
`Available labels: [${available}]`,
);
}

const kernel = kernelRaw === undefined ? 9 : kernelRaw;
if (typeof kernel !== "number" || !Number.isInteger(kernel) || kernel <= 0) {
throw new BadRequestException(
`split_kernel_size must be a positive integer, got ${JSON.stringify(kernelRaw)}`,
);
}
if (kernel % 2 === 0) {
throw new BadRequestException(
`split_kernel_size must be odd (so the morphological kernel has a centred pixel), got ${kernel}`,
);
}

return {
polygon_split: { enabled: true, class_indices: indices, kernel_size: kernel },
remainingHyperparams: rest,
};
}
}
11 changes: 9 additions & 2 deletions apps/ml-yolo/app/ml/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ..models.dataset_config import DatasetConfig
from ..models.image import Image
from ..models.model_type import ModelType
from ..models.polygon_split_config import PolygonSplitConfig

DATASET_DIR = "dataset"
DATASET_CONFIG = "dataset_config.yml"
Expand Down Expand Up @@ -86,7 +87,11 @@ def prepare_classification_directory(


def prepare_dataset(
dir: str, images: list[Image], config: DatasetConfig, task_type: ModelType
dir: str,
images: list[Image],
config: DatasetConfig,
task_type: ModelType,
polygon_split: PolygonSplitConfig | None = None,
):
global VAL_DIR
dir = f"{dir}/{DATASET_DIR}"
Expand All @@ -112,4 +117,6 @@ def prepare_dataset(
)
copy_image(image, f"{image_dir}/{curr_dir}")
with open(f"{label_dir}/{curr_dir}/{label_filename}", "w") as f:
f.write("\n".join(image.labels_str(task_type)))
f.write(
"\n".join(image.labels_str(task_type, polygon_split=polygon_split))
)
1 change: 1 addition & 0 deletions apps/ml-yolo/app/ml/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def run_training(config: ModelConfig) -> None:
config.data,
config.training_config.dataset_config,
config.type,
polygon_split=config.training_config.polygon_split,
)

# 2) Optional deterministic preprocessings.
Expand Down
22 changes: 19 additions & 3 deletions apps/ml-yolo/app/models/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .labels.polygon_label import PolygonLabel
from .labels.rectangle_label import RectangleLabel
from .model_type import ModelType
from .polygon_split_config import PolygonSplitConfig


class Image(BaseSchema):
Expand All @@ -15,16 +16,31 @@ class Image(BaseSchema):
height: int
labels: list[Union[ClassificationLabel, PolygonLabel, RectangleLabel]] = []

def labels_str(self, model_type: ModelType | None = None) -> list[str]:
def labels_str(
self,
model_type: ModelType | None = None,
polygon_split: PolygonSplitConfig | None = None,
) -> list[str]:
result = []
for label in self.labels:
if isinstance(label, ClassificationLabel):
result.append(label.to_str(self.width, self.height))
elif isinstance(label, PolygonLabel):
kernel = (
polygon_split.kernel_size
if polygon_split
and polygon_split.enabled
and label.label.label_number in polygon_split.class_indices
else None
)
if model_type == ModelType.DETECTION:
result.append(label.to_bbox_str(self.width, self.height))
result.extend(
label.to_bbox_str_lines(self.width, self.height, kernel)
)
else:
result.append(label.to_str(self.width, self.height))
result.extend(
label.to_str_lines(self.width, self.height, kernel)
)
elif isinstance(label, RectangleLabel):
result.append(label.to_str(self.width, self.height))
return result
Expand Down
113 changes: 105 additions & 8 deletions apps/ml-yolo/app/models/labels/polygon_label.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,94 @@
import cv2
import numpy as np

from ..base_schema import BaseSchema

from .label import Label

# Components smaller than this fraction of the opened polygon's area are
# discarded — they're almost always rasterisation noise rather than real
# pieces of the object.
_MIN_COMPONENT_AREA_RATIO = 0.05


class PolygonLabel(BaseSchema):
label: Label
points: list[tuple[float, float]]

def __get_normalized_points(
self, width: int, height: int
) -> list[tuple[float, float]]:
# Points are stored in percentage space (0-100); YOLO label files want
# 0-1 normalised coords. Width/height are unused at the normalisation
# step but kept in the signature for symmetry with other label types.
def __normalized_points(self) -> list[tuple[float, float]]:
return [(x / 100, y / 100) for x, y in self.points]

def to_str(self, width: int, height: int) -> str:
normalized_points = self.__get_normalized_points(width, height)
def __split_components(
self, width: int, height: int, kernel_size: int
) -> list[list[tuple[float, float]]]:
"""Rasterise the polygon, apply a morphological opening to break thin
bridges between visually-disconnected pieces, and emit one polygon
(in normalised 0-1 coords) per surviving connected component.

Falls back to the original polygon when the opening leaves a single
component, when the polygon is degenerate, or when any geometry step
would otherwise produce no usable contour.
"""
if width <= 0 or height <= 0 or kernel_size <= 0:
return [self.__normalized_points()]

if len(self.points) < 3:
return [self.__normalized_points()]

pixel_pts = np.array(
[
(int(round((x / 100) * width)), int(round((y / 100) * height)))
for x, y in self.points
],
dtype=np.int32,
)

mask = np.zeros((height, width), dtype=np.uint8)
cv2.fillPoly(mask, [pixel_pts], 255)

kernel = cv2.getStructuringElement(
cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)
)
opened = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)

num_labels, comp_labels = cv2.connectedComponents(opened)
# num_labels counts background as label 0, so <= 2 means at most one
# real component survived the opening — no split happened.
if num_labels <= 2:
return [self.__normalized_points()]

total_area = int(np.count_nonzero(opened))
if total_area == 0:
return [self.__normalized_points()]

out: list[list[tuple[float, float]]] = []
for cid in range(1, num_labels):
comp = np.where(comp_labels == cid, 255, 0).astype(np.uint8)
comp_area = int(np.count_nonzero(comp))
if comp_area < _MIN_COMPONENT_AREA_RATIO * total_area:
continue
contours, _ = cv2.findContours(
comp, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS
)
if not contours:
continue
biggest = max(contours, key=cv2.contourArea).squeeze()
if biggest.ndim != 2 or biggest.shape[0] < 3:
continue
out.append(
[(float(p[0]) / width, float(p[1]) / height) for p in biggest]
)

return out or [self.__normalized_points()]

def __seg_line(self, normalized_points: list[tuple[float, float]]) -> str:
points_str = " ".join(f"{x:.6f} {y:.6f}" for x, y in normalized_points)
return f"{self.label.label_number} {points_str}"

def to_bbox_str(self, width: int, height: int) -> str:
"""Convert polygon to YOLO bbox format (cx, cy, w, h) normalized."""
normalized_points = self.__get_normalized_points(width, height)
def __bbox_line(self, normalized_points: list[tuple[float, float]]) -> str:
xs = [x for x, y in normalized_points]
ys = [y for x, y in normalized_points]
x_min = max(0.0, min(xs))
Expand All @@ -31,3 +100,31 @@ def to_bbox_str(self, width: int, height: int) -> str:
w = x_max - x_min
h = y_max - y_min
return f"{self.label.label_number} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}"

def __resolve_polygons(
self, width: int, height: int, split_kernel_size: int | None
) -> list[list[tuple[float, float]]]:
if split_kernel_size:
return self.__split_components(width, height, split_kernel_size)
return [self.__normalized_points()]

def to_str_lines(
self, width: int, height: int, split_kernel_size: int | None = None
) -> list[str]:
polygons = self.__resolve_polygons(width, height, split_kernel_size)
return [self.__seg_line(p) for p in polygons]

def to_bbox_str_lines(
self, width: int, height: int, split_kernel_size: int | None = None
) -> list[str]:
polygons = self.__resolve_polygons(width, height, split_kernel_size)
return [self.__bbox_line(p) for p in polygons]

# Backwards-compatible single-string accessors. The split-aware caller
# (Image.labels_str) uses the *_lines variants directly; these remain
# for any other consumers that expect a single label string.
def to_str(self, width: int, height: int) -> str:
return self.to_str_lines(width, height)[0]

def to_bbox_str(self, width: int, height: int) -> str:
return self.to_bbox_str_lines(width, height)[0]
Loading
Loading