Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion client/platform/desktop/backend/native/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -779,9 +779,10 @@ async function _ingestFilePath(
merge(meta, pick(jsonObject, DatasetMetaMutableKeys));
metadataConfig = true;
} else if (coco.isCocoJson(jsonObject)) {
const [parsedAnnotations, parsedMeta] = await coco.parseFile(path);
const [parsedAnnotations, parsedMeta, cocoWarnings] = await coco.parseFile(path);
annotations = parsedAnnotations;
merge(meta, parsedMeta);
warnings = warnings.concat(cocoWarnings);
} else {
// Regular dive json
annotations = await loadAnnotationFile(path);
Expand Down
104 changes: 103 additions & 1 deletion client/platform/desktop/backend/serializers/coco.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ describe('COCO serializer', () => {
});

it('parses COCO with DIVE extension attributes', async () => {
const [parsed] = await parseFile('/input/coco.json');
const [parsed, , warnings] = await parseFile('/input/coco.json');
expect(warnings).toEqual([]);
const track = parsed.tracks[7];
expect(track.id).toBe(7);
expect(track.begin).toBe(1);
Expand All @@ -97,6 +98,107 @@ describe('COCO serializer', () => {
expect(geometryTypes).toEqual(expect.arrayContaining(['Polygon', 'Point', 'LineString']));
});

it('throws a descriptive error when bbox and polygon are both missing', async () => {
mockfs({
'/input': {
'coco_no_bbox.json': JSON.stringify({
images: [{ id: 1, file_name: 'frame_000001.jpg', frame_index: 0 }],
annotations: [{
id: 2,
image_id: 1,
category_id: 5,
iscrowd: 1,
segmentation: { size: [100, 100], counts: 'abc' },
}],
categories: [{ id: 5, name: 'fish' }],
}),
},
});
await expect(parseFile('/input/coco_no_bbox.json')).rejects.toThrow(/no bbox and no usable polygon/);
await expect(parseFile('/input/coco_no_bbox.json')).rejects.toThrow(/RLE segmentation masks still require a bbox/);
});

it('derives bbox from polygon when bbox is omitted', async () => {
mockfs({
'/input': {
'coco_polygon_only.json': JSON.stringify({
images: [{ id: 1, file_name: 'frame_000001.jpg', frame_index: 0 }],
annotations: [{
id: 3,
image_id: 1,
category_id: 5,
track_id: 401,
segmentation: [[120, 80, 200, 80, 200, 120, 120, 120]],
}],
categories: [{ id: 5, name: 'fish' }],
}),
},
});
const [parsed, , warnings] = await parseFile('/input/coco_polygon_only.json');
expect(parsed.tracks[401].features[0].bounds).toEqual([120, 80, 200, 120]);
expect(parsed.tracks[401].features[0].geometry?.features.length).toBe(1);
expect(warnings).toEqual([]);
});

it('imports polygon segmentations and warns on RLE in the same file', async () => {
mockfs({
'/input': {
'coco_mixed.json': JSON.stringify({
images: [{ id: 1, file_name: 'frame_000001.jpg', frame_index: 0 }],
annotations: [
{
id: 1,
image_id: 1,
category_id: 5,
bbox: [120, 80, 80, 40],
track_id: 301,
segmentation: [[120, 80, 200, 80, 200, 120, 120, 120]],
},
{
id: 2,
image_id: 1,
category_id: 5,
bbox: [400, 200, 200, 60],
track_id: 302,
iscrowd: 1,
segmentation: { size: [1080, 1920], counts: 'abc' },
},
],
categories: [{ id: 5, name: 'fish' }],
}),
},
});
const [parsed, , warnings] = await parseFile('/input/coco_mixed.json');
expect(parsed.tracks[301].features[0].geometry?.features.length).toBe(1);
expect(parsed.tracks[302].features[0].geometry).toBeUndefined();
expect(warnings).toHaveLength(1);
});

it('imports bbox when RLE masks are present and returns a warning', async () => {
mockfs({
'/input': {
'coco_rle.json': JSON.stringify({
images: [{ id: 1, file_name: 'frame_000001.jpg', frame_index: 1 }],
annotations: [{
id: 2,
image_id: 1,
category_id: 5,
bbox: [10, 20, 30, 40],
track_id: 8,
iscrowd: 1,
segmentation: { size: [100, 100], counts: 'abc' },
}],
categories: [{ id: 5, name: 'fish' }],
}),
},
});
const [parsed, , warnings] = await parseFile('/input/coco_rle.json');
expect(parsed.tracks[8].features[0].bounds).toEqual([10, 20, 40, 60]);
expect(parsed.tracks[8].features[0].geometry).toBeUndefined();
expect(warnings).toHaveLength(1);
expect(warnings[0]).toContain('segmentation masks');
});

it('serializes COCO with DIVE extension attributes', async () => {
await serializeFile('/output/out.coco.json', annotationSchema, imageMeta);
const out = await fs.readJSON('/output/out.coco.json');
Expand Down
173 changes: 137 additions & 36 deletions client/platform/desktop/backend/serializers/coco.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,108 @@ type CocoCategory = {
keypoints?: string[];
};

const RLE_SEGMENTATION_WARNING = (
'The COCO file included run-length encoded segmentation masks that are not supported. '
+ 'Bounding boxes and other annotation data were imported, but masks were skipped.'
);

function hasValidBbox(annotation: CocoAnnotation): boolean {
const { bbox } = annotation;
return Array.isArray(bbox) && bbox.length === 4;
}

function extractPolygonCoordsLists(
segmentation: CocoAnnotation['segmentation'],
): [number, number][][] {
if (!segmentation || !Array.isArray(segmentation)) {
return [];
}
const polygons = (
segmentation.length > 0 && typeof segmentation[0] === 'number'
? [segmentation as number[]]
: segmentation
) as Array<number[] | Record<string, unknown>>;
const coordLists: [number, number][][] = [];
polygons.forEach((polygon) => {
if (Array.isArray(polygon)) {
const coords: [number, number][] = [];
for (let i = 0; i + 1 < polygon.length; i += 2) {
coords.push([polygon[i], polygon[i + 1]]);
}
if (coords.length) {
coordLists.push(coords);
}
}
});
return coordLists;
}

function bboxFromPoints(points: [number, number][]): [number, number, number, number] {
const xs = points.map(([x]) => x);
const ys = points.map(([, y]) => y);
const xMin = Math.min(...xs);
const yMin = Math.min(...ys);
return [xMin, yMin, Math.max(...xs) - xMin, Math.max(...ys) - yMin];
}

function annotationHasImportableBounds(annotation: CocoAnnotation): boolean {
if (hasValidBbox(annotation)) {
return true;
}
if (hasRleSegmentation(annotation)) {
return false;
}
return extractPolygonCoordsLists(annotation.segmentation).length > 0;
}

function missingBoundsError(annotationIds: Array<number | string>): string {
const shown = annotationIds.slice(0, 10).join(', ');
const extra = annotationIds.length > 10 ? ` (and ${annotationIds.length - 10} more)` : '';
return (
`${annotationIds.length} COCO annotation(s) cannot be imported because they have no bbox and `
+ `no usable polygon segmentation (ids: ${shown}${extra}). `
+ 'Provide bbox [x, y, width, height] or polygon segmentation as [[x1, y1, ...]]. '
+ 'Annotations with only RLE segmentation masks still require a bbox.'
);
}

function resolveCocoBbox(annotation: CocoAnnotation): [number, number, number, number] {
if (hasValidBbox(annotation)) {
return annotation.bbox as [number, number, number, number];
}
const allPoints = extractPolygonCoordsLists(annotation.segmentation).flat();
if (allPoints.length) {
return bboxFromPoints(allPoints);
}
throw new Error(missingBoundsError([annotation.id]));
}

function validateAnnotationBounds(annotations: CocoAnnotation[]): void {
const missingIds = annotations
.filter((annotation) => !annotationHasImportableBounds(annotation))
.map((annotation) => annotation.id);
if (missingIds.length) {
throw new Error(missingBoundsError(missingIds));
}
}

type CocoAnnotation = {
id: number;
image_id: number;
category_id: number;
bbox: [number, number, number, number];
bbox?: [number, number, number, number];
score?: number;
track_id?: number;
/**
* COCO `iscrowd` flag (0 or 1). In the COCO spec, 0 means a single instance with
* polygon `segmentation` ([[x1, y1, ...]]); 1 means a crowd region whose
* `segmentation` is run-length encoded (RLE) as an object (e.g. { counts, size }).
* DIVE does not import RLE masks: when `iscrowd` is truthy, or `segmentation` is
* a dict, polygon/mask geometry is skipped (bbox and other fields still import).
*/
iscrowd?: number;
keypoints?: number[];
segmentation?: number[][];
segmentation?: number[][] | Record<string, unknown>;
dive_detection_attributes?: Record<string, unknown>;
dive_track_attributes?: Record<string, unknown>;
dive_notes?: string[];
Expand All @@ -40,39 +133,35 @@ type CocoDocument = {
categories: CocoCategory[];
};

/** True when segmentation is COCO RLE (crowd / `iscrowd: 1`), which DIVE does not decode. */
function hasRleSegmentation(annotation: CocoAnnotation): boolean {
if (annotation.iscrowd) {
return true;
}
const { segmentation } = annotation;
return Boolean(segmentation) && !Array.isArray(segmentation);
}

function buildFeatureGeometry(
annotation: CocoAnnotation,
category?: CocoCategory,
): GeoJSON.FeatureCollection<TrackSupportedFeature, GeoJSON.GeoJsonProperties> | undefined {
): { geometry?: GeoJSON.FeatureCollection<TrackSupportedFeature, GeoJSON.GeoJsonProperties>; rleSkipped: boolean } {
if (hasRleSegmentation(annotation)) {
return { rleSkipped: true };
}
const geometryFeatures:
GeoJSON.Feature<TrackSupportedFeature, GeoJSON.GeoJsonProperties>[] = [];
const { segmentation } = annotation;
if (segmentation) {
if (!Array.isArray(segmentation)) {
throw new Error('Run-length encoded COCO segmentation is not supported');
}
const polygons = (
segmentation.length > 0 && typeof segmentation[0] === 'number'
? [segmentation]
: segmentation
) as number[][];
polygons.forEach((polygon) => {
const coords: number[][] = [];
for (let i = 0; i + 1 < polygon.length; i += 2) {
coords.push([polygon[i], polygon[i + 1]]);
}
if (coords.length) {
geometryFeatures.push({
type: 'Feature',
properties: { key: '' },
geometry: {
type: 'Polygon',
coordinates: [coords],
},
});
}
const coordLists = extractPolygonCoordsLists(annotation.segmentation);
coordLists.forEach((coords) => {
geometryFeatures.push({
type: 'Feature',
properties: { key: '' },
geometry: {
type: 'Polygon',
coordinates: [coords],
},
});
}
});

const keypoints = annotation.keypoints || [];
if (Array.isArray(keypoints) && keypoints.length >= 3) {
Expand Down Expand Up @@ -110,10 +199,15 @@ function buildFeatureGeometry(
}
}

if (!geometryFeatures.length) return undefined;
if (!geometryFeatures.length) {
return { rleSkipped: false };
}
return {
type: 'FeatureCollection' as const,
features: geometryFeatures,
geometry: {
type: 'FeatureCollection' as const,
features: geometryFeatures,
},
rleSkipped: false,
};
}

Expand All @@ -134,19 +228,22 @@ function imageFrameMap(document: CocoDocument): Record<number, number> {
return map;
}

async function parseFile(path: string): Promise<[AnnotationSchema, Record<string, unknown>]> {
async function parseFile(path: string): Promise<[AnnotationSchema, Record<string, unknown>, string[]]> {
const parsed = await fs.readJSON(path);
if (!isCocoJson(parsed)) {
throw new Error('JSON does not match COCO format');
}
const categoriesById = Object.fromEntries(parsed.categories.map((c) => [c.id, c]));
const frameByImageId = imageFrameMap(parsed);
const tracks: AnnotationSchema['tracks'] = {};
let skippedRleMasks = false;

validateAnnotationBounds(parsed.annotations);

parsed.annotations.forEach((annotation) => {
const frame = frameByImageId[annotation.image_id];
if (frame === undefined) return;
const [x, y, w, h] = annotation.bbox;
const [x, y, w, h] = resolveCocoBbox(annotation);
const bounds: [number, number, number, number] = [x, y, x + w, y + h];
const trackId = annotation.track_id ?? annotation.id;
const category = categoriesById[annotation.category_id];
Expand Down Expand Up @@ -187,7 +284,10 @@ async function parseFile(path: string): Promise<[AnnotationSchema, Record<string
} else if (typeof noteField === 'string' && noteField.trim()) {
feature.notes = [noteField.trim()];
}
const geometry = buildFeatureGeometry(annotation, category);
const { geometry, rleSkipped } = buildFeatureGeometry(annotation, category);
if (rleSkipped) {
skippedRleMasks = true;
}
if (geometry) {
feature.geometry = geometry;
}
Expand All @@ -197,7 +297,8 @@ async function parseFile(path: string): Promise<[AnnotationSchema, Record<string

const annotations: AnnotationSchema = { version: 2, tracks, groups: {} };
const processed = processTrackAttributes(Object.values(annotations.tracks));
return [annotations, { attributes: processed.attributes }];
const warnings = skippedRleMasks ? [RLE_SEGMENTATION_WARNING] : [];
return [annotations, { attributes: processed.attributes }, warnings];
}

function frameNameForExport(frame: number, meta: JsonMeta): string {
Expand Down
8 changes: 5 additions & 3 deletions docs/DataFormats.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,12 +261,14 @@ For COCO files not produced by DIVE:

* Supported:
* Bounding boxes (`bbox`)
* Polygon segmentations in list format (`segmentation: [[x1, y1, ...]]`)
* Polygon segmentations in list format (`segmentation: [[x1, y1, ...]]`); if `bbox` is
omitted, DIVE derives it from the polygon's axis-aligned bounds
* Head/tail keypoints from category keypoint labels
* Partially supported:
* COCO has no direct equivalent for DIVE groups, so groups are not represented in COCO export.
* Unsupported:
* Run-length encoded segmentations (RLE)
* Partially supported:
* Run-length encoded segmentations (RLE): bounding boxes and other fields import,
but masks are skipped and a warning is shown.

### Example COCO Annotation with DIVE Extensions

Expand Down
Loading
Loading