diff --git a/labelbox/data/annotation_types/annotation.py b/labelbox/data/annotation_types/annotation.py index fab0da6c4..d583a2e7c 100644 --- a/labelbox/data/annotation_types/annotation.py +++ b/labelbox/data/annotation_types/annotation.py @@ -1,5 +1,5 @@ import abc -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Union from .classification import Checklist, Dropdown, Radio, Text from .feature import FeatureSchema @@ -72,11 +72,13 @@ class VideoObjectAnnotation(ObjectAnnotation): value (Geometry) frame (Int): The frame index that this annotation corresponds to keyframe (bool): Whether or not this annotation was a human generated or interpolated annotation + segment_id (Optional[Int]): Index of video segment this annotation belongs to classifications (List[ClassificationAnnotation]) = [] extra (Dict[str, Any]) """ frame: int keyframe: bool + segment_index: Optional[int] = None class VideoClassificationAnnotation(ClassificationAnnotation): @@ -87,6 +89,8 @@ class VideoClassificationAnnotation(ClassificationAnnotation): feature_schema_id (Optional[Cuid]) value (Union[Text, Checklist, Radio, Dropdown]) frame (int): The frame index that this annotation corresponds to + segment_id (Optional[Int]): Index of video segment this annotation belongs to extra (Dict[str, Any]) """ frame: int + segment_index: Optional[int] = None diff --git a/labelbox/data/serialization/ndjson/label.py b/labelbox/data/serialization/ndjson/label.py index 1a12bcd30..8dd339767 100644 --- a/labelbox/data/serialization/ndjson/label.py +++ b/labelbox/data/serialization/ndjson/label.py @@ -88,6 +88,39 @@ def _get_consecutive_frames( consecutive.append((group[0], group[-1])) return consecutive + @classmethod + def _get_segment_frame_ranges( + cls, annotation_group: List[Union[VideoClassificationAnnotation, + VideoObjectAnnotation]] + ) -> List[Tuple[int, int]]: + sorted_frame_segment_indices = sorted([ + (annotation.frame, annotation.segment_index) + for annotation in annotation_group + if annotation.segment_index is not None + ]) + if len(sorted_frame_segment_indices) == 0: + # Group segment by consecutive frames, since `segment_index` is not present + return cls._get_consecutive_frames( + sorted([annotation.frame for annotation in annotation_group])) + elif len(sorted_frame_segment_indices) == len(annotation_group): + # Group segment by segment_index + last_segment_id = 0 + segment_groups = defaultdict(list) + for frame, segment_index in sorted_frame_segment_indices: + if segment_index < last_segment_id: + raise ValueError( + f"`segment_index` must be in ascending order. Please investigate video annotation at frame, '{frame}'" + ) + segment_groups[segment_index].append(frame) + last_segment_id = segment_index + frame_ranges = [] + for group in segment_groups.values(): + frame_ranges.append((group[0], group[-1])) + return frame_ranges + else: + raise ValueError( + f"Video annotations cannot partially have `segment_index` set") + @classmethod def _create_video_annotations( cls, label: Label @@ -102,12 +135,12 @@ def _create_video_annotations( annot.name].append(annot) for annotation_group in video_annotations.values(): - consecutive_frames = cls._get_consecutive_frames( - sorted([annotation.frame for annotation in annotation_group])) + segment_frame_ranges = cls._get_segment_frame_ranges( + annotation_group) if isinstance(annotation_group[0], VideoClassificationAnnotation): annotation = annotation_group[0] frames_data = [] - for frames in consecutive_frames: + for frames in segment_frame_ranges: frames_data.append({'start': frames[0], 'end': frames[-1]}) annotation.extra.update({'frames': frames_data}) yield NDClassification.from_common(annotation, label.data) @@ -118,7 +151,7 @@ def _create_video_annotations( for video object annotations and will not import alongside the object annotations.""") segments = [] - for start_frame, end_frame in consecutive_frames: + for start_frame, end_frame in segment_frame_ranges: segment = [] for annotation in annotation_group: if annotation.keyframe and start_frame <= annotation.frame <= end_frame: diff --git a/labelbox/data/serialization/ndjson/objects.py b/labelbox/data/serialization/ndjson/objects.py index f2200e8f5..068455c99 100644 --- a/labelbox/data/serialization/ndjson/objects.py +++ b/labelbox/data/serialization/ndjson/objects.py @@ -65,9 +65,10 @@ def from_common(cls, point: Point, class NDFramePoint(VideoSupported): point: _Point - def to_common(self, name: str, - feature_schema_id: Cuid) -> VideoObjectAnnotation: + def to_common(self, name: str, feature_schema_id: Cuid, + segment_index: int) -> VideoObjectAnnotation: return VideoObjectAnnotation(frame=self.frame, + segment_index=segment_index, keyframe=True, name=name, feature_schema_id=feature_schema_id, @@ -104,10 +105,11 @@ def from_common(cls, line: Line, class NDFrameLine(VideoSupported): line: List[_Point] - def to_common(self, name: str, - feature_schema_id: Cuid) -> VideoObjectAnnotation: + def to_common(self, name: str, feature_schema_id: Cuid, + segment_index: int) -> VideoObjectAnnotation: return VideoObjectAnnotation( frame=self.frame, + segment_index=segment_index, keyframe=True, name=name, feature_schema_id=feature_schema_id, @@ -171,10 +173,11 @@ def from_common(cls, rectangle: Rectangle, class NDFrameRectangle(VideoSupported): bbox: Bbox - def to_common(self, name: str, - feature_schema_id: Cuid) -> VideoObjectAnnotation: + def to_common(self, name: str, feature_schema_id: Cuid, + segment_index: int) -> VideoObjectAnnotation: return VideoObjectAnnotation( frame=self.frame, + segment_index=segment_index, keyframe=True, name=name, feature_schema_id=feature_schema_id, @@ -211,11 +214,13 @@ def segment_with_uuid(keyframe: Union[NDFrameRectangle, NDFramePoint, keyframe.extra = {'uuid': uuid} return keyframe - def to_common(self, name: str, feature_schema_id: Cuid, uuid: str): + def to_common(self, name: str, feature_schema_id: Cuid, uuid: str, + segment_index: int): return [ self.segment_with_uuid( keyframe.to_common(name=name, - feature_schema_id=feature_schema_id), uuid) + feature_schema_id=feature_schema_id, + segment_index=segment_index), uuid) for keyframe in self.keyframes ] @@ -235,11 +240,12 @@ class NDSegments(NDBaseObject): def to_common(self, name: str, feature_schema_id: Cuid): result = [] - for segment in self.segments: + for idx, segment in enumerate(self.segments): result.extend( NDSegment.to_common(segment, name=name, feature_schema_id=feature_schema_id, + segment_index=idx, uuid=self.uuid)) return result diff --git a/tests/data/assets/ndjson/video_import.json b/tests/data/assets/ndjson/video_import.json index 0e1bd11e2..5db642489 100644 --- a/tests/data/assets/ndjson/video_import.json +++ b/tests/data/assets/ndjson/video_import.json @@ -30,13 +30,17 @@ { "frame": 1, "line": [{"x": 10.0, "y": 10.0}, {"x": 100.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] + }, + { + "frame": 5, + "line": [{"x": 15.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] } ] }, { "keyframes": [ { - "frame": 5, + "frame": 8, "line": [{"x": 100.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] } ] @@ -62,6 +66,10 @@ { "frame": 5, "point": {"x": 50.0, "y": 50.0} + }, + { + "frame": 10, + "point": {"x": 10.0, "y": 50.0} } ] } @@ -78,13 +86,17 @@ { "frame": 1, "bbox": {"top": 10.0, "left": 5.0, "height": 100.0, "width": 150.0} + }, + { + "frame": 5, + "bbox": {"top": 30.0, "left": 5.0, "height": 50.0, "width": 150.0} } ] }, { "keyframes": [ { - "frame": 5, + "frame": 10, "bbox": {"top": 300.0, "left": 200.0, "height": 400.0, "width": 150.0} } ] diff --git a/tests/data/assets/ndjson/video_import_name_only.json b/tests/data/assets/ndjson/video_import_name_only.json index acdc42e6e..4f69d1d43 100644 --- a/tests/data/assets/ndjson/video_import_name_only.json +++ b/tests/data/assets/ndjson/video_import_name_only.json @@ -30,13 +30,17 @@ { "frame": 1, "line": [{"x": 10.0, "y": 10.0}, {"x": 100.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] + }, + { + "frame": 5, + "line": [{"x": 15.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] } ] }, { "keyframes": [ { - "frame": 5, + "frame": 8, "line": [{"x": 100.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] } ] @@ -62,6 +66,10 @@ { "frame": 5, "point": {"x": 50.0, "y": 50.0} + }, + { + "frame": 10, + "point": {"x": 10.0, "y": 50.0} } ] } @@ -78,13 +86,17 @@ { "frame": 1, "bbox": {"top": 10.0, "left": 5.0, "height": 100.0, "width": 150.0} + }, + { + "frame": 5, + "bbox": {"top": 30.0, "left": 5.0, "height": 50.0, "width": 150.0} } ] }, { "keyframes": [ { - "frame": 5, + "frame": 10, "bbox": {"top": 300.0, "left": 200.0, "height": 400.0, "width": 150.0} } ]