From 4b1bbaae95a89bcdab4f86fc2b0d1ab0b5f21bf3 Mon Sep 17 00:00:00 2001 From: Dean Date: Wed, 25 Mar 2026 13:03:49 +0200 Subject: [PATCH 1/5] Add COCO annotation import/export support Co-Authored-By: Claude Opus 4.6 --- dagshub/__init__.py | 2 +- dagshub/auth/token_auth.py | 2 +- dagshub/data_engine/annotation/importer.py | 14 +- dagshub/data_engine/annotation/metadata.py | 84 ++++++- dagshub/data_engine/model/datapoint.py | 54 ++++- dagshub/data_engine/model/query_result.py | 157 +++++++++---- dagshub/data_engine/util/__init__.py | 0 dagshub/data_engine/util/not_implemented.py | 48 ++++ .../res/audio_annotation.json | 82 +++++++ .../test_annotation_parsing.py | 103 ++++++++- .../annotation_import/test_coco.py | 218 ++++++++++++++++++ tests/data_engine/conftest.py | 3 +- tests/mocks/repo_api.py | 4 + 13 files changed, 693 insertions(+), 78 deletions(-) create mode 100644 dagshub/data_engine/util/__init__.py create mode 100644 dagshub/data_engine/util/not_implemented.py create mode 100644 tests/data_engine/annotation_import/res/audio_annotation.json create mode 100644 tests/data_engine/annotation_import/test_coco.py diff --git a/dagshub/__init__.py b/dagshub/__init__.py index 7f4d765d..b14e2564 100644 --- a/dagshub/__init__.py +++ b/dagshub/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.6.5" +__version__ = "0.6.7" from .logger import DAGsHubLogger, dagshub_logger from .common.init import init from .upload.wrapper import upload_files diff --git a/dagshub/auth/token_auth.py b/dagshub/auth/token_auth.py index 31ec32ac..7ba3a70a 100644 --- a/dagshub/auth/token_auth.py +++ b/dagshub/auth/token_auth.py @@ -37,7 +37,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]: def can_renegotiate(self): # Env var tokens cannot renegotiate, every other token type can - return not type(self._token) is EnvVarDagshubToken + return type(self._token) is not EnvVarDagshubToken def renegotiate_token(self): if not self._token_storage.is_valid_token(self._token, self._host): diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py index c19212de..80e62468 100644 --- a/dagshub/data_engine/annotation/importer.py +++ b/dagshub/data_engine/annotation/importer.py @@ -3,6 +3,7 @@ from tempfile import TemporaryDirectory from typing import TYPE_CHECKING, Literal, Optional, Union, Sequence, Mapping, Callable, List +from dagshub_annotation_converter.converters.coco import load_coco_from_file from dagshub_annotation_converter.converters.cvat import load_cvat_from_zip from dagshub_annotation_converter.converters.yolo import load_yolo_from_fs from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask @@ -16,7 +17,7 @@ if TYPE_CHECKING: from dagshub.data_engine.model.datasource import Datasource -AnnotationType = Literal["yolo", "cvat"] +AnnotationType = Literal["yolo", "cvat", "coco"] AnnotationLocation = Literal["repo", "disk"] @@ -85,6 +86,8 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]: ) elif self.annotations_type == "cvat": annotation_dict = load_cvat_from_zip(annotations_file) + elif self.annotations_type == "coco": + annotation_dict, _ = load_coco_from_file(annotations_file) return annotation_dict @@ -92,7 +95,6 @@ def download_annotations(self, dest_dir: Path): log_message("Downloading annotations from repository") repoApi = self.ds.source.repoApi if self.annotations_type == "cvat": - # Download just the annotation file repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True) elif self.annotations_type == "yolo": # Download the dataset .yaml file and the images + annotations @@ -104,6 +106,8 @@ def download_annotations(self, dest_dir: Path): # Download the annotation data assert context.path is not None repoApi.download(self.annotations_file.parent / context.path, dest_dir, keep_source_prefix=True) + elif self.annotations_type == "coco": + repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True) @staticmethod def determine_load_location(ds: "Datasource", annotations_path: Union[str, Path]) -> AnnotationLocation: @@ -153,8 +157,10 @@ def remap_annotations( ) continue for ann in anns: - assert ann.filename is not None - ann.filename = remap_func(ann.filename) + if ann.filename is not None: + ann.filename = remap_func(ann.filename) + else: + ann.filename = new_filename remapped[new_filename] = anns return remapped diff --git a/dagshub/data_engine/annotation/metadata.py b/dagshub/data_engine/annotation/metadata.py index 8b5d632c..0b080e0f 100644 --- a/dagshub/data_engine/annotation/metadata.py +++ b/dagshub/data_engine/annotation/metadata.py @@ -1,25 +1,32 @@ -from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union, Literal, Dict +from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Tuple, Union -from dagshub_annotation_converter.formats.label_studio.task import parse_ls_task, LabelStudioTask -from dagshub_annotation_converter.formats.yolo import import_lookup, import_yolo_result, YoloContext +from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask, parse_ls_task +from dagshub_annotation_converter.formats.yolo import YoloContext, import_lookup, import_yolo_result from dagshub_annotation_converter.formats.yolo.categories import Categories from dagshub_annotation_converter.ir.image import ( - IRBBoxImageAnnotation, CoordinateStyle, - IRSegmentationImageAnnotation, - IRSegmentationPoint, + IRBBoxImageAnnotation, IRPoseImageAnnotation, IRPosePoint, + IRSegmentationImageAnnotation, + IRSegmentationPoint, ) from dagshub_annotation_converter.ir.image.annotations.base import IRAnnotationBase, IRImageAnnotationBase from dagshub.common.api import UserAPI from dagshub.common.helpers import log_message +from dagshub.data_engine.util.not_implemented import NotImplementedMeta if TYPE_CHECKING: - from dagshub.data_engine.model.datapoint import Datapoint import ultralytics.engine.results + from dagshub.data_engine.model.datapoint import Datapoint + +from dagshub_annotation_converter.formats.label_studio.videorectangle import VideoRectangleAnnotation +from dagshub_annotation_converter.formats.label_studio.task import task_lookup as _task_lookup + +_task_lookup["videorectangle"] = VideoRectangleAnnotation + class AnnotationMetaDict(dict): def __init__(self, annotation: "MetadataAnnotations", *args, **kwargs): @@ -269,6 +276,28 @@ def add_image_pose( self.annotations.append(ann) self._update_datapoint() + def add_coco_annotation( + self, + coco_json: str, + ): + """ + Add annotations from a COCO-format JSON string. + + Args: + coco_json: A COCO-format JSON string with ``categories``, ``images``, and ``annotations`` keys. + """ + from dagshub_annotation_converter.converters.coco import load_coco_from_json_string + + grouped, _ = load_coco_from_json_string(coco_json) + new_anns: list[IRAnnotationBase] = [] + for anns in grouped.values(): + for ann in anns: + ann.filename = self.datapoint.path + new_anns.append(ann) + self.annotations.extend(new_anns) + log_message(f"Added {len(new_anns)} COCO annotation(s) to datapoint {self.datapoint.path}") + self._update_datapoint() + def add_yolo_annotation( self, annotation_type: Literal["bbox", "segmentation", "pose"], @@ -315,3 +344,44 @@ def _generate_yolo_context(annotation_type, categories: Dict[int, str]) -> YoloC for cat_id, cat_name in categories.items(): cats.add(cat_name, cat_id) return YoloContext(annotation_type=annotation_type, categories=cats) + + +class UnsupportedMetadataAnnotations(MetadataAnnotations, metaclass=NotImplementedMeta): + def __init__( + self, + datapoint: "Datapoint", + field: str, + original_value: bytes, + ): + super().__init__(datapoint, field, None, None, original_value) + + @property + def value(self) -> Optional[bytes]: + return self._original_value + + def to_ls_task(self) -> Optional[bytes]: + return self._original_value + + def __repr__(self): + return "Label Studio annotations of unrecognized type" + + +class ErrorMetadataAnnotations(MetadataAnnotations, metaclass=NotImplementedMeta): + def __init__( + self, + datapoint: "Datapoint", + field: str, + error_message: str, + ): + super().__init__(datapoint, field, None, None, None) + self._error_message = error_message + + @property + def value(self) -> Optional[bytes]: + raise ValueError(self._error_message) + + def to_ls_task(self) -> Optional[bytes]: + raise ValueError(self._error_message) + + def __repr__(self): + return f"Label Studio annotation download error: {self._error_message}" diff --git a/dagshub/data_engine/model/datapoint.py b/dagshub/data_engine/model/datapoint.py index b7aa89b5..f0c31925 100644 --- a/dagshub/data_engine/model/datapoint.py +++ b/dagshub/data_engine/model/datapoint.py @@ -3,14 +3,14 @@ from dataclasses import dataclass from os import PathLike from pathlib import Path -from typing import Optional, Union, List, Dict, Any, Callable, TYPE_CHECKING, Literal, Sequence +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Literal, Optional, Sequence, Union -from tenacity import Retrying, stop_after_attempt, wait_exponential, before_sleep_log, retry_if_exception_type +from tenacity import Retrying, before_sleep_log, retry_if_exception_type, stop_after_attempt, wait_exponential from dagshub.common.download import download_files from dagshub.common.helpers import http_request from dagshub.data_engine.annotation import MetadataAnnotations -from dagshub.data_engine.client.models import MetadataSelectFieldSchema, DatapointHistoryResult +from dagshub.data_engine.client.models import DatapointHistoryResult, MetadataSelectFieldSchema from dagshub.data_engine.dtypes import MetadataFieldType if TYPE_CHECKING: @@ -25,6 +25,23 @@ logger = logging.getLogger(__name__) +@dataclass(frozen=True) +class BlobHashMetadata: + hash: str + + def __str__(self) -> str: + return self.hash + + def __repr__(self) -> str: + return f"BlobHashMetadata(hash={self.hash!r})" + + +class BlobDownloadError(Exception): + def __init__(self, message): + super().__init__(message) + self.message = message + + @dataclass class Datapoint: datapoint_id: int @@ -128,6 +145,7 @@ def from_gql_edge(edge: Dict, datasource: "Datasource", fields: List[MetadataSel float_fields = {f.name for f in fields if f.valueType == MetadataFieldType.FLOAT} date_fields = {f.name for f in fields if f.valueType == MetadataFieldType.DATETIME} + blob_fields = {f.name for f in fields if f.valueType == MetadataFieldType.BLOB} for meta_dict in edge["node"]["metadata"]: key = meta_dict["key"] @@ -138,6 +156,8 @@ def from_gql_edge(edge: Dict, datasource: "Datasource", fields: List[MetadataSel if key in date_fields: timezone = meta_dict.get("timeZone") value = _datetime_from_timestamp(value / 1000, timezone or "+00:00") + elif key in blob_fields and isinstance(value, str): + value = BlobHashMetadata(value) res.metadata[key] = value return res @@ -164,7 +184,7 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes: if type(current_value) is bytes: # Bytes - it's already there! return current_value - if isinstance(current_value, Path): + elif isinstance(current_value, Path): # Path - assume the path exists and is already downloaded, # because it's unlikely that the user has set it themselves with current_value.open("rb") as f: @@ -173,18 +193,16 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes: self.metadata[column] = content return content - elif type(current_value) is str: - # String - This is probably the hash of the blob, get that from dagshub - blob_url = self.blob_url(current_value) - blob_location = self.blob_cache_location / current_value + elif isinstance(current_value, BlobHashMetadata): + # Blob hash metadata - download blob from DagsHub + blob_url = self.blob_url(current_value.hash) + blob_location = self.blob_cache_location / current_value.hash # Make sure that the cache location exists if cache_on_disk: self.blob_cache_location.mkdir(parents=True, exist_ok=True) content = _get_blob(blob_url, blob_location, self.datasource.source.repoApi.auth, cache_on_disk, True) - if type(content) is str: - raise RuntimeError(f"Error while downloading blob: {content}") if store_value: self.metadata[column] = content @@ -192,6 +210,11 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes: self.metadata[column] = blob_location return content + elif isinstance(current_value, MetadataAnnotations): + ls_task = current_value.to_ls_task() + if ls_task is None: + return b"" + return ls_task else: raise ValueError(f"Can't extract blob metadata from value {current_value} of type {type(current_value)}") @@ -274,10 +297,17 @@ def _get_blob( """ Args: url: url to download the blob from - cache_path: where the cache for the blob is (laods from it if exists, stores there if it doesn't) + cache_path: where the cache for the blob is (loads from it if exists, stores there if it doesn't) auth: auth to use for getting the blob cache_on_disk: whether to store the downloaded blob on disk. If False we also turn off the cache checking return_blob: if True returns the blob of the downloaded data, if False returns the path to the file with it + path_format: if return_blob is False, controls path representation. "path" returns Path, "str" returns str + + Returns: + bytes, Path, or str path on success. + + Raises: + BlobDownloadError on download failure. """ if url is None: return None @@ -313,7 +343,7 @@ def get(): with attempt: content = get() except Exception as e: - return f"Error while downloading binary blob: {e}" + raise BlobDownloadError(str(e)) from e if cache_on_disk: with cache_path.open("wb") as f: diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index 6c326eab..6031e0bf 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -15,6 +15,8 @@ import dacite import dagshub_annotation_converter.converters.yolo import rich.progress +from dagshub_annotation_converter.converters.coco import export_to_coco_file +from dagshub_annotation_converter.formats.coco import CocoContext from dagshub_annotation_converter.formats.yolo import YoloContext from dagshub_annotation_converter.formats.yolo.categories import Categories from dagshub_annotation_converter.formats.yolo.common import ir_mapping @@ -30,6 +32,7 @@ from dagshub.common.rich_util import get_rich_progress from dagshub.common.util import lazy_load, multi_urljoin from dagshub.data_engine.annotation import MetadataAnnotations +from dagshub.data_engine.annotation.metadata import ErrorMetadataAnnotations, UnsupportedMetadataAnnotations from dagshub.data_engine.annotation.voxel_conversion import ( add_ls_annotations, add_voxel_annotations, @@ -37,7 +40,13 @@ from dagshub.data_engine.client.loaders.base import DagsHubDataset from dagshub.data_engine.client.models import DatasourceType, MetadataSelectFieldSchema from dagshub.data_engine.dtypes import MetadataFieldType -from dagshub.data_engine.model.datapoint import Datapoint, _generated_fields, _get_blob +from dagshub.data_engine.model.datapoint import ( + BlobDownloadError, + BlobHashMetadata, + Datapoint, + _generated_fields, + _get_blob, +) from dagshub.data_engine.model.schema_util import dacite_config from dagshub.data_engine.voxel_plugin_server.utils import set_voxel_envvars @@ -389,10 +398,9 @@ def get_blob_fields( for dp in self.entries: for fld in fields: field_value = dp.metadata.get(fld) - # If field_value is a blob or a path, then ignore, means it's already been downloaded - if not isinstance(field_value, str): + if not isinstance(field_value, BlobHashMetadata): continue - download_task = (dp, fld, dp.blob_url(field_value), dp.blob_cache_location / field_value) + download_task = (dp, fld, dp.blob_url(field_value.hash), dp.blob_cache_location / field_value.hash) to_download.append(download_task) progress = get_rich_progress(rich.progress.MofNCompleteColumn()) @@ -402,8 +410,6 @@ def get_blob_fields( def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path): blob_or_path = _get_blob(url, blob_path, auth, cache_on_disk, load_into_memory, path_format) - if isinstance(blob_or_path, str) and path_format != "str": - logger.warning(f"Error while downloading blob for field {field} in datapoint {dp.path}:{blob_or_path}") dp.metadata[field] = blob_or_path with progress: @@ -415,7 +421,7 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path): logger.warning(f"Got exception {type(exc)} while downloading blob: {exc}") progress.update(task, advance=1) - self._convert_annotation_fields(*fields, load_into_memory=load_into_memory) + self._convert_annotation_fields(*fields) # Convert any downloaded document fields document_fields = [f for f in fields if f in self.document_fields] @@ -424,49 +430,63 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path): if document_fields: for dp in self: for fld in document_fields: - if fld in dp.metadata: - # Override the load_into_memory flag, because we need the contents - if not load_into_memory: - dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes() - dp.metadata[fld] = dp.metadata[fld].decode("utf-8") + if fld not in dp.metadata: + continue + try: + content = dp.get_blob(fld) + dp.metadata[fld] = content.decode("utf-8") + except BlobDownloadError as e: + logger.warning(f"Failed to download document field '{fld}' for datapoint '{dp.path}': {e}") return self - def _convert_annotation_fields(self, *fields, load_into_memory): + def _convert_annotation_fields(self, *fields): # Convert any downloaded annotation column annotation_fields = [f for f in fields if f in self.annotation_fields] + if not annotation_fields: + return + # List of datapoints with annotations that couldn't be parsed bad_annotations = defaultdict(list) - if annotation_fields: - # Convert them - for dp in self: - for fld in annotation_fields: - if fld in dp.metadata: - # Already loaded - skip - if isinstance(dp.metadata[fld], MetadataAnnotations): - continue - # Override the load_into_memory flag, because we need the contents - if not load_into_memory: - dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes() - try: - dp.metadata[fld] = MetadataAnnotations.from_ls_task( - datapoint=dp, field=fld, ls_task=dp.metadata[fld] - ) - except ValidationError: - bad_annotations[fld].append(dp.path) - else: - dp.metadata[fld] = MetadataAnnotations(datapoint=dp, field=fld) + for dp in self: + for fld in annotation_fields: + metadata_value = dp.metadata.get(fld) + # No value - create empty annotation container + if metadata_value is None: + dp.metadata[fld] = MetadataAnnotations(datapoint=dp, field=fld) + continue + # Already loaded - skip + elif isinstance(metadata_value, MetadataAnnotations): + continue + # Parse annotation from the content of the field + else: + try: + annotation_content = dp.get_blob(fld) + dp.metadata[fld] = MetadataAnnotations.from_ls_task( + datapoint=dp, field=fld, ls_task=annotation_content + ) + except BlobDownloadError as e: + dp.metadata[fld] = ErrorMetadataAnnotations(datapoint=dp, field=fld, error_message=e.message) + bad_annotations[fld].append(dp.path) + except ValidationError: + dp.metadata[fld] = UnsupportedMetadataAnnotations( + datapoint=dp, field=fld, original_value=annotation_content + ) + bad_annotations[fld].append(dp.path) if bad_annotations: log_message( - "Warning: The following datapoints had invalid annotations, " - "any annotation-related operations will not work on these:" + "Warning: The following datapoints had unsupported or invalid annotations, " + "convenience functions like `add_bounding_box` won't work on these:" ) err_msg = "" for fld, dps in bad_annotations.items(): - err_msg += f'Field "{fld}" in datapoints:\n\t' - err_msg += "\n\t".join(dps) + err_msg += f'\nField "{fld}" in datapoints:\n\t' + if len(dps) > 10: + err_msg += "\n\t".join(dps[:10]) + f"\n\t... and {len(dps) - 10} more" + else: + err_msg += "\n\t".join(dps) log_message(err_msg) def download_binary_columns( @@ -760,6 +780,16 @@ def _get_all_annotations(self, annotation_field: str) -> List[IRImageAnnotationB annotations.extend(dp.metadata[annotation_field].annotations) return annotations + def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str: + if annotation_field is not None: + return annotation_field + annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()]) + if len(annotation_fields) == 0: + raise ValueError("No annotation fields found in the datasource") + annotation_field = annotation_fields[0] + log_message(f"Using annotations from field {annotation_field}") + return annotation_field + def export_as_yolo( self, download_dir: Optional[Union[str, Path]] = None, @@ -785,12 +815,7 @@ def export_as_yolo( Returns: The path to the YAML file with the metadata. Pass this path to ``YOLO.train()`` to train a model. """ - if annotation_field is None: - annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()]) - if len(annotation_fields) == 0: - raise ValueError("No annotation fields found in the datasource") - annotation_field = annotation_fields[0] - log_message(f"Using annotations from field {annotation_field}") + annotation_field = self._resolve_annotation_field(annotation_field) if download_dir is None: download_dir = Path("dagshub_export") @@ -843,6 +868,54 @@ def export_as_yolo( log_message(f"Done! Saved YOLO Dataset, YAML file is at {yaml_path.absolute()}") return yaml_path + def export_as_coco( + self, + download_dir: Optional[Union[str, Path]] = None, + annotation_field: Optional[str] = None, + output_filename: str = "annotations.json", + classes: Optional[Dict[int, str]] = None, + ) -> Path: + """ + Downloads the files and exports annotations in COCO format. + + Args: + download_dir: Where to download the files. Defaults to ``./dagshub_export`` + annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field. + output_filename: Name of the output COCO JSON file. Default is ``annotations.json``. + classes: Category mapping for the COCO dataset as ``{id: name}``. + If ``None``, categories will be inferred from the annotations. + + Returns: + Path to the exported COCO JSON file. + """ + annotation_field = self._resolve_annotation_field(annotation_field) + + if download_dir is None: + download_dir = Path("dagshub_export") + download_dir = Path(download_dir) + + annotations = self._get_all_annotations(annotation_field) + if not annotations: + raise RuntimeError("No annotations found to export") + + context = CocoContext() + if classes is not None: + context.categories = dict(classes) + + # Add the source prefix to all annotations + for ann in annotations: + ann.filename = os.path.join(self.datasource.source.source_prefix, ann.filename) + + image_download_path = download_dir / "data" + log_message("Downloading image files...") + self.download_files(image_download_path) + + output_path = download_dir / output_filename + log_message("Exporting COCO annotations...") + result_path = export_to_coco_file(annotations, output_path, context=context) + log_message(f"Done! Saved COCO annotations to {result_path.absolute()}") + return result_path + def to_voxel51_dataset(self, **kwargs) -> "fo.Dataset": """ Creates a voxel51 dataset that can be used with\ diff --git a/dagshub/data_engine/util/__init__.py b/dagshub/data_engine/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dagshub/data_engine/util/not_implemented.py b/dagshub/data_engine/util/not_implemented.py new file mode 100644 index 00000000..d9a81285 --- /dev/null +++ b/dagshub/data_engine/util/not_implemented.py @@ -0,0 +1,48 @@ +class NotImplementedMeta(type): + """ + A metaclass that replaces all parent class methods and properties that aren't overridden in the subclass + with NotImplementedError. + """ + + def __new__(mcs, name, bases, namespace): + # Get all attributes from base classes + for base in bases: + for attr_name in dir(base): + if attr_name.startswith("_"): + continue + + # Skip if already defined in subclass + if attr_name in namespace: + continue + + base_attr = getattr(base, attr_name) + + # Handle properties + if isinstance(base_attr, property): + # Create a property that raises NotImplementedError + def make_not_implemented_property(prop_name): + def getter(self): + raise NotImplementedError(f"Property '{prop_name}' not implemented") + + def setter(self, value): + raise NotImplementedError(f"Property '{prop_name}' not implemented") + + def deleter(self): + raise NotImplementedError(f"Property '{prop_name}' not implemented") + + return property(getter, setter, deleter) + + namespace[attr_name] = make_not_implemented_property(attr_name) + + # Handle regular methods + elif callable(base_attr): + + def make_not_implemented(method_name): + def not_impl(self, *args, **kwargs): + raise NotImplementedError(f"Method '{method_name}' not implemented") + + return not_impl + + namespace[attr_name] = make_not_implemented(attr_name) + + return super().__new__(mcs, name, bases, namespace) diff --git a/tests/data_engine/annotation_import/res/audio_annotation.json b/tests/data_engine/annotation_import/res/audio_annotation.json new file mode 100644 index 00000000..adc356e2 --- /dev/null +++ b/tests/data_engine/annotation_import/res/audio_annotation.json @@ -0,0 +1,82 @@ +{ + "id": 41, + "data": { + "audio": "https://example.com/some-non-existent-file.mp3", + "media type": "audio/mpeg", + "size": 111699 + }, + "meta": { + "datapoint_id": 12345678, + "datasource_id": 6565 + }, + "created_at": "2025-12-20T13:44:02.316027Z", + "updated_at": "2026-01-26T15:00:13.046967Z", + "is_labeled": true, + "project": 1, + "annotations": [ + { + "completed_by": 1, + "result": [ + { + "type": "choices", + "value": { + "choices": [ + "true" + ] + }, + "origin": "manual", + "to_name": "audio", + "from_name": "further_utterance", + "id": "deadbeef1" + }, + { + "type": "rating", + "value": { + "rating": 1 + }, + "origin": "manual", + "to_name": "audio", + "from_name": "difficulty", + "id": "deadbeef1" + }, + { + "type": "textarea", + "value": { + "text": [ + "kirill@dagshub.com" + ] + }, + "origin": "manual", + "to_name": "audio", + "from_name": "email_address", + "id": "deadbeef1" + }, + { + "to_name": "audio", + "from_name": "first_name", + "id": "Qzu1dR2RQ8", + "type": "textarea", + "value": { + "text": [ + "Kirill" + ] + }, + "origin": "manual" + }, + { + "type": "textarea", + "value": { + "text": [ + "Bolashev" + ] + }, + "origin": "manual", + "to_name": "audio", + "from_name": "last_name", + "id": "deadbeef1" + } + ], + "ground_truth": false + } + ] +} diff --git a/tests/data_engine/annotation_import/test_annotation_parsing.py b/tests/data_engine/annotation_import/test_annotation_parsing.py index 66840ecb..c04b0d51 100644 --- a/tests/data_engine/annotation_import/test_annotation_parsing.py +++ b/tests/data_engine/annotation_import/test_annotation_parsing.py @@ -1,19 +1,24 @@ import json +from os import PathLike from pathlib import Path +from typing import Union from unittest.mock import MagicMock import pytest from dagshub_annotation_converter.ir.image import IRSegmentationImageAnnotation +from pytest import MonkeyPatch from dagshub.data_engine.annotation import MetadataAnnotations +from dagshub.data_engine.annotation.metadata import ErrorMetadataAnnotations, UnsupportedMetadataAnnotations from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags -from dagshub.data_engine.model import query_result +from dagshub.data_engine.model import datapoint, query_result +from dagshub.data_engine.model.datapoint import BlobDownloadError, BlobHashMetadata from dagshub.data_engine.model.datasource import Datasource +from dagshub.data_engine.model.query_result import QueryResult from tests.data_engine.util import add_metadata_field _annotation_field_name = "annotation" _dp_path = "data/sample_datapoint.jpg" -_annotation_hash = "annotation1" # Corresponds to a resource JSON _res_folder = Path(__file__).parent / "res" @@ -51,17 +56,24 @@ def mock_annotation_query_result( return query_result.QueryResult.from_gql_query(data_dict, ds) -def mock_get_blob(*args, **kwargs) -> bytes: +def mock_get_blob(*args, **kwargs) -> Union[bytes, PathLike]: download_url: str = args[0] blob_hash = download_url.split("/")[-1] + load_into_memory = args[4] blob_path = _res_folder / f"{blob_hash}.json" - if not blob_path.exists(): - raise FileNotFoundError(f"Mock blob file not found: {blob_path}") - return blob_path.read_bytes() + try: + if not blob_path.exists(): + raise FileNotFoundError(f"Blob with hash {blob_hash} not found in res folder") + if load_into_memory: + return blob_path.read_bytes() + else: + return blob_path + except Exception as e: + raise BlobDownloadError(str(e)) from e -@pytest.fixture -def ds_with_document_annotation(ds, monkeypatch): + +def _ds_with_annotation(ds: "Datasource", monkeypatch: MonkeyPatch, annotation_hash: str): add_metadata_field( ds, _annotation_field_name, @@ -70,18 +82,89 @@ def ds_with_document_annotation(ds, monkeypatch): ) ds.source.client.get_datapoints = MagicMock( - return_value=mock_annotation_query_result(ds, _annotation_field_name, _dp_path, _annotation_hash) + return_value=mock_annotation_query_result(ds, _annotation_field_name, _dp_path, annotation_hash) ) monkeypatch.setattr(query_result, "_get_blob", mock_get_blob) + monkeypatch.setattr(datapoint, "_get_blob", mock_get_blob) - yield ds + return ds + + +@pytest.fixture +def ds_with_document_annotation(ds, monkeypatch): + yield _ds_with_annotation(ds, monkeypatch, "annotation1") def test_annotation_with_document_are_parsed_as_annotation(ds_with_document_annotation): qr = ds_with_document_annotation.all() + _test_annotation(qr) + + +def test_double_loading_annotation_works(ds_with_document_annotation): + qr = ds_with_document_annotation.all() + qr.get_blob_fields(_annotation_field_name) + _test_annotation(qr) + + +def _test_annotation(qr: QueryResult): annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name] assert isinstance(annotation, MetadataAnnotations) # Check that the annotation got parsed correctly, the JSON should have one segmentation annotation in it assert len(annotation.annotations) == 1 assert isinstance(annotation.annotations[0], IRSegmentationImageAnnotation) + + +@pytest.fixture +def ds_with_unsupported_annotation(ds, monkeypatch): + yield _ds_with_annotation(ds, monkeypatch, "audio_annotation") + + +def test_handling_unsupported_annotation(ds_with_unsupported_annotation): + qr = ds_with_unsupported_annotation.all() + + annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name] + + assert isinstance(annotation, UnsupportedMetadataAnnotations) + # Unsupported annotation is still a subclass of regular annotation + # This is crucial for logic that checks if annotation metadata was parsed already, + # so if this starts failing, that logic will need to be changed too + assert isinstance(annotation, MetadataAnnotations) + + with pytest.raises(NotImplementedError): + annotation.add_image_bbox("cat", 0, 0, 10, 10, 1920, 1080) + + expected_content = (_res_folder / "audio_annotation.json").read_bytes() + assert annotation.value == expected_content + assert annotation.to_ls_task() == expected_content + + +@pytest.fixture +def ds_with_nonexistent_annotation(ds, monkeypatch): + yield _ds_with_annotation(ds, monkeypatch, "nonexistent_annotation") + + +def test_nonexistent_annotation(ds_with_nonexistent_annotation): + qr = ds_with_nonexistent_annotation.all(load_documents=False, load_annotations=False) + qr.get_annotations() + + annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name] + + assert isinstance(annotation, ErrorMetadataAnnotations) + # Error annotation is still a subclass of regular annotation + # This is crucial for logic that checks if annotation metadata was parsed already, + # so if this starts failing, that logic will need to be changed too + assert isinstance(annotation, MetadataAnnotations) + + with pytest.raises(NotImplementedError): + annotation.add_image_bbox("cat", 0, 0, 10, 10, 1920, 1080) + + with pytest.raises(ValueError, match="Blob with hash nonexistent_annotation not found in res folder"): + _ = annotation.value + with pytest.raises(ValueError, match="Blob with hash nonexistent_annotation not found in res folder"): + annotation.to_ls_task() + + +def test_blob_metadata_is_wrapped_from_backend(ds_with_document_annotation): + qr = ds_with_document_annotation.all(load_documents=False, load_annotations=False) + assert isinstance(qr[0].metadata[_annotation_field_name], BlobHashMetadata) diff --git a/tests/data_engine/annotation_import/test_coco.py b/tests/data_engine/annotation_import/test_coco.py new file mode 100644 index 00000000..9b238fd1 --- /dev/null +++ b/tests/data_engine/annotation_import/test_coco.py @@ -0,0 +1,218 @@ +import datetime +import json +from pathlib import PurePosixPath +from unittest.mock import patch, PropertyMock + +import pytest +from dagshub_annotation_converter.ir.image import ( + IRBBoxImageAnnotation, + CoordinateStyle, +) + +from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationsNotFoundError +from dagshub.data_engine.annotation.metadata import MetadataAnnotations +from dagshub.data_engine.client.models import MetadataSelectFieldSchema +from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags +from dagshub.data_engine.model.datapoint import Datapoint +from dagshub.data_engine.model.query_result import QueryResult + + +@pytest.fixture(autouse=True) +def mock_source_prefix(ds): + with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()): + yield + + +# --- import --- + + +def test_import_coco_from_file(ds, tmp_path): + _write_coco(tmp_path, _make_coco_json()) + importer = AnnotationImporter(ds, "coco", tmp_path / "annotations.json", load_from="disk") + result = importer.import_annotations() + + assert "image1.jpg" in result + assert len(result["image1.jpg"]) == 1 + assert isinstance(result["image1.jpg"][0], IRBBoxImageAnnotation) + + +def test_import_coco_nonexistent_raises(ds, tmp_path): + importer = AnnotationImporter(ds, "coco", tmp_path / "nope.json", load_from="disk") + with pytest.raises(AnnotationsNotFoundError): + importer.import_annotations() + + +def test_coco_convert_to_ls_tasks(ds, tmp_path, mock_dagshub_auth): + importer = AnnotationImporter(ds, "coco", tmp_path / "ann.json", load_from="disk") + bbox = IRBBoxImageAnnotation( + filename="test.jpg", categories={"cat": 1.0}, + top=0.1, left=0.1, width=0.2, height=0.2, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.NORMALIZED, + ) + tasks = importer.convert_to_ls_tasks({"test.jpg": [bbox]}) + + assert "test.jpg" in tasks + task_json = json.loads(tasks["test.jpg"]) + assert "annotations" in task_json + assert len(task_json["annotations"]) > 0 + + +# --- add_coco_annotation --- + + +def test_add_coco_annotation_rewrites_filename(ds, mock_dagshub_auth): + dp = Datapoint(datasource=ds, path="my_images/photo.jpg", datapoint_id=0, metadata={}) + meta_ann = MetadataAnnotations(datapoint=dp, field="ann") + meta_ann.add_coco_annotation(json.dumps(_make_coco_json())) + + assert len(meta_ann.annotations) == 1 + assert isinstance(meta_ann.annotations[0], IRBBoxImageAnnotation) + assert meta_ann.annotations[0].filename == "my_images/photo.jpg" + + +# --- _resolve_annotation_field --- + + +def test_resolve_explicit_field(ds): + qr = _make_qr(ds, [], ann_field="my_ann") + assert qr._resolve_annotation_field("explicit") == "explicit" + + +def test_resolve_auto_field(ds): + qr = _make_qr(ds, [], ann_field="my_ann") + assert qr._resolve_annotation_field(None) == "my_ann" + + +def test_resolve_no_fields_raises(ds): + qr = _make_qr(ds, [], ann_field=None) + with pytest.raises(ValueError, match="No annotation fields"): + qr._resolve_annotation_field(None) + + +def test_resolve_picks_alphabetically_first(ds): + fields = [] + for name in ["zebra_ann", "alpha_ann"]: + fields.append(MetadataSelectFieldSchema( + asOf=int(datetime.datetime.now().timestamp()), + autoGenerated=False, originalName=name, + multiple=False, valueType=MetadataFieldType.BLOB, + name=name, tags={ReservedTags.ANNOTATION.value}, + )) + qr = QueryResult(datasource=ds, _entries=[], fields=fields) + assert qr._resolve_annotation_field(None) == "alpha_ann" + + +# --- export_as_coco --- + + +def test_export_coco_bbox_coordinates(ds, tmp_path): + dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) + ann = IRBBoxImageAnnotation( + filename="images/test.jpg", categories={"cat": 1.0}, + top=20.0, left=10.0, width=30.0, height=40.0, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + + coco = json.loads(result.read_text()) + assert coco["annotations"][0]["bbox"] == [10.0, 20.0, 30.0, 40.0] + + +def test_export_coco_no_annotations_raises(ds, tmp_path): + dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[]) + + qr = _make_qr(ds, [dp], ann_field="ann") + with pytest.raises(RuntimeError, match="No annotations found"): + qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + + +def test_export_coco_explicit_classes(ds, tmp_path): + dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[_make_image_bbox("images/test.jpg")] + ) + + qr = _make_qr(ds, [dp], ann_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco( + download_dir=tmp_path, annotation_field="ann", classes={1: "cat", 2: "dog"} + ) + + coco = json.loads(result.read_text()) + assert "cat" in {c["name"] for c in coco["categories"]} + + +def test_export_coco_custom_filename(ds, tmp_path): + dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={}) + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[_make_image_bbox("images/test.jpg")] + ) + + qr = _make_qr(ds, [dp], ann_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco( + download_dir=tmp_path, annotation_field="ann", output_filename="custom.json" + ) + + assert result.name == "custom.json" + + +def test_export_coco_multiple_datapoints(ds, tmp_path): + dps = [] + for i, name in enumerate(["a.jpg", "b.jpg"]): + dp = Datapoint(datasource=ds, path=name, datapoint_id=i, metadata={}) + dp.metadata["ann"] = MetadataAnnotations( + datapoint=dp, field="ann", annotations=[_make_image_bbox(name)] + ) + dps.append(dp) + + qr = _make_qr(ds, dps, ann_field="ann") + with patch.object(qr, "download_files"): + result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann") + + coco = json.loads(result.read_text()) + assert len(coco["annotations"]) == 2 + assert len(coco["images"]) == 2 + + +# --- helpers --- + + +def _make_coco_json(): + return { + "categories": [{"id": 1, "name": "cat"}], + "images": [{"id": 1, "width": 640, "height": 480, "file_name": "image1.jpg"}], + "annotations": [{"id": 1, "image_id": 1, "category_id": 1, "bbox": [10, 20, 30, 40]}], + } + + +def _write_coco(tmp_path, coco): + (tmp_path / "annotations.json").write_text(json.dumps(coco)) + + +def _make_image_bbox(filename="test.jpg") -> IRBBoxImageAnnotation: + return IRBBoxImageAnnotation( + filename=filename, categories={"cat": 1.0}, + top=20.0, left=10.0, width=30.0, height=40.0, + image_width=640, image_height=480, + coordinate_style=CoordinateStyle.DENORMALIZED, + ) + + +def _make_qr(ds, datapoints, ann_field=None): + fields = [] + if ann_field: + fields.append(MetadataSelectFieldSchema( + asOf=int(datetime.datetime.now().timestamp()), + autoGenerated=False, originalName=ann_field, + multiple=False, valueType=MetadataFieldType.BLOB, + name=ann_field, tags={ReservedTags.ANNOTATION.value}, + )) + return QueryResult(datasource=ds, _entries=datapoints, fields=fields) diff --git a/tests/data_engine/conftest.py b/tests/data_engine/conftest.py index e8f0c70a..e57d1e83 100644 --- a/tests/data_engine/conftest.py +++ b/tests/data_engine/conftest.py @@ -5,7 +5,7 @@ from dagshub.common.api import UserAPI from dagshub.common.api.responses import UserAPIResponse from dagshub.data_engine import datasources -from dagshub.data_engine.client.models import MetadataSelectFieldSchema, PreprocessingStatus +from dagshub.data_engine.client.models import DatasourceType, MetadataSelectFieldSchema, PreprocessingStatus from dagshub.data_engine.model.datapoint import Datapoint from dagshub.data_engine.model.datasource import DatasetState, Datasource from dagshub.data_engine.model.query_result import QueryResult @@ -26,6 +26,7 @@ def other_ds(mocker, mock_dagshub_auth) -> Datasource: def _create_mock_datasource(mocker, id, name) -> Datasource: ds_state = datasources.DatasourceState(id=id, name=name, repo="kirill/repo") + ds_state.source_type = DatasourceType.REPOSITORY ds_state.path = "repo://kirill/repo/data/" ds_state.preprocessing_status = PreprocessingStatus.READY mocker.patch.object(ds_state, "client") diff --git a/tests/mocks/repo_api.py b/tests/mocks/repo_api.py index d457d161..22b6c94c 100644 --- a/tests/mocks/repo_api.py +++ b/tests/mocks/repo_api.py @@ -113,6 +113,10 @@ def generate_content_api_entry(path, is_dir=False, versioning="dvc") -> ContentA def default_branch(self) -> str: return self._default_branch + @property + def id(self) -> int: + return 1 + def get_connected_storages(self) -> List[StorageAPIEntry]: return self.storages From e68048a4ea48d8fbb1f6176f7d56dc0de9b71d33 Mon Sep 17 00:00:00 2001 From: Kirill Bolashev Date: Sun, 29 Mar 2026 13:44:10 +0300 Subject: [PATCH 2/5] Test: use the coco_converter branch of the annotation converter while PR is WIP --- setup.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 6cdef855..a4a08913 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ -import setuptools import os.path +import setuptools + # Thank you pip contributors def read(rel_path: str) -> str: @@ -41,7 +42,11 @@ def get_version(rel_path: str) -> str: "python-dateutil", "boto3", "semver", - "dagshub-annotation-converter>=0.1.12", + # FIXME: roll back to main after merging + # "dagshub-annotation-converter>=0.1.12", + "dagshub-annotation-converter @ " + + "git+https://github.com/DagsHub/" + + "dagshub-annotation-converter@coco_converter#egg=dagshub-annotation-converter", ] extras_require = { From 4f830e466ab76dc9bca4d44d16d2d4c46a582172 Mon Sep 17 00:00:00 2001 From: Dean Date: Sun, 29 Mar 2026 15:06:41 +0300 Subject: [PATCH 3/5] Fix review comments --- dagshub/__init__.py | 2 +- dagshub/auth/token_auth.py | 2 +- dagshub/data_engine/annotation/importer.py | 8 +++---- dagshub/data_engine/annotation/metadata.py | 22 ------------------- dagshub/data_engine/model/query_result.py | 5 ++++- .../annotation_import/test_coco.py | 22 +------------------ tests/data_engine/conftest.py | 3 +++ 7 files changed, 14 insertions(+), 50 deletions(-) diff --git a/dagshub/__init__.py b/dagshub/__init__.py index 10f3c0cb..7f68de54 100644 --- a/dagshub/__init__.py +++ b/dagshub/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.6.10" +__version__ = "0.6.9" from .logger import DAGsHubLogger, dagshub_logger from .common.init import init from .upload.wrapper import upload_files diff --git a/dagshub/auth/token_auth.py b/dagshub/auth/token_auth.py index 7ba3a70a..31ec32ac 100644 --- a/dagshub/auth/token_auth.py +++ b/dagshub/auth/token_auth.py @@ -37,7 +37,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]: def can_renegotiate(self): # Env var tokens cannot renegotiate, every other token type can - return type(self._token) is not EnvVarDagshubToken + return not type(self._token) is EnvVarDagshubToken def renegotiate_token(self): if not self._token_storage.is_valid_token(self._token, self._host): diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py index 80e62468..90661df1 100644 --- a/dagshub/data_engine/annotation/importer.py +++ b/dagshub/data_engine/annotation/importer.py @@ -95,6 +95,7 @@ def download_annotations(self, dest_dir: Path): log_message("Downloading annotations from repository") repoApi = self.ds.source.repoApi if self.annotations_type == "cvat": + # Download just the annotation file repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True) elif self.annotations_type == "yolo": # Download the dataset .yaml file and the images + annotations @@ -107,6 +108,7 @@ def download_annotations(self, dest_dir: Path): assert context.path is not None repoApi.download(self.annotations_file.parent / context.path, dest_dir, keep_source_prefix=True) elif self.annotations_type == "coco": + # Download just the annotation file repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True) @staticmethod @@ -157,10 +159,8 @@ def remap_annotations( ) continue for ann in anns: - if ann.filename is not None: - ann.filename = remap_func(ann.filename) - else: - ann.filename = new_filename + assert ann.filename is not None + ann.filename = remap_func(ann.filename) remapped[new_filename] = anns return remapped diff --git a/dagshub/data_engine/annotation/metadata.py b/dagshub/data_engine/annotation/metadata.py index 140f7733..06f7bc28 100644 --- a/dagshub/data_engine/annotation/metadata.py +++ b/dagshub/data_engine/annotation/metadata.py @@ -271,28 +271,6 @@ def add_image_pose( self.annotations.append(ann) self._update_datapoint() - def add_coco_annotation( - self, - coco_json: str, - ): - """ - Add annotations from a COCO-format JSON string. - - Args: - coco_json: A COCO-format JSON string with ``categories``, ``images``, and ``annotations`` keys. - """ - from dagshub_annotation_converter.converters.coco import load_coco_from_json_string - - grouped, _ = load_coco_from_json_string(coco_json) - new_anns: list[IRAnnotationBase] = [] - for anns in grouped.values(): - for ann in anns: - ann.filename = self.datapoint.path - new_anns.append(ann) - self.annotations.extend(new_anns) - log_message(f"Added {len(new_anns)} COCO annotation(s) to datapoint {self.datapoint.path}") - self._update_datapoint() - def add_yolo_annotation( self, annotation_type: Literal["bbox", "segmentation", "pose"], diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py index 6031e0bf..ddec542c 100644 --- a/dagshub/data_engine/model/query_result.py +++ b/dagshub/data_engine/model/query_result.py @@ -900,7 +900,10 @@ def export_as_coco( context = CocoContext() if classes is not None: - context.categories = dict(classes) + categories = Categories() + for category_id, category_name in classes.items(): + categories.add(category_name, category_id) + context.categories = categories # Add the source prefix to all annotations for ann in annotations: diff --git a/tests/data_engine/annotation_import/test_coco.py b/tests/data_engine/annotation_import/test_coco.py index 9b238fd1..0db9cf8f 100644 --- a/tests/data_engine/annotation_import/test_coco.py +++ b/tests/data_engine/annotation_import/test_coco.py @@ -1,7 +1,6 @@ import datetime import json -from pathlib import PurePosixPath -from unittest.mock import patch, PropertyMock +from unittest.mock import patch import pytest from dagshub_annotation_converter.ir.image import ( @@ -17,12 +16,6 @@ from dagshub.data_engine.model.query_result import QueryResult -@pytest.fixture(autouse=True) -def mock_source_prefix(ds): - with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()): - yield - - # --- import --- @@ -58,19 +51,6 @@ def test_coco_convert_to_ls_tasks(ds, tmp_path, mock_dagshub_auth): assert len(task_json["annotations"]) > 0 -# --- add_coco_annotation --- - - -def test_add_coco_annotation_rewrites_filename(ds, mock_dagshub_auth): - dp = Datapoint(datasource=ds, path="my_images/photo.jpg", datapoint_id=0, metadata={}) - meta_ann = MetadataAnnotations(datapoint=dp, field="ann") - meta_ann.add_coco_annotation(json.dumps(_make_coco_json())) - - assert len(meta_ann.annotations) == 1 - assert isinstance(meta_ann.annotations[0], IRBBoxImageAnnotation) - assert meta_ann.annotations[0].filename == "my_images/photo.jpg" - - # --- _resolve_annotation_field --- diff --git a/tests/data_engine/conftest.py b/tests/data_engine/conftest.py index e57d1e83..02ee8331 100644 --- a/tests/data_engine/conftest.py +++ b/tests/data_engine/conftest.py @@ -1,4 +1,6 @@ import datetime +from pathlib import PurePosixPath +from unittest.mock import PropertyMock import pytest @@ -34,6 +36,7 @@ def _create_mock_datasource(mocker, id, name) -> Datasource: mocker.patch.object(ds_state, "get_from_dagshub") # Stub out root path so all the content_path/etc work without also mocking out RepoAPI mocker.patch.object(ds_state, "_root_path", return_value="http://example.com") + mocker.patch.object(type(ds_state), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()) ds_state.repoApi = MockRepoAPI("kirill/repo") return Datasource(ds_state) From eb2c6437eb9f635ed0913a030fc41c03ff30f464 Mon Sep 17 00:00:00 2001 From: Dean P Date: Mon, 13 Apr 2026 12:33:22 +0300 Subject: [PATCH 4/5] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a4a08913..a26ff6b7 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ def get_version(rel_path: str) -> str: # "dagshub-annotation-converter>=0.1.12", "dagshub-annotation-converter @ " + "git+https://github.com/DagsHub/" - + "dagshub-annotation-converter@coco_converter#egg=dagshub-annotation-converter", + + "dagshub-annotation-converter@main#egg=dagshub-annotation-converter", ] extras_require = { From 15289ed4f54c97e155a83516959779db96a6c1b0 Mon Sep 17 00:00:00 2001 From: Dean Date: Tue, 14 Apr 2026 12:20:10 +0300 Subject: [PATCH 5/5] bump converter version --- setup.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/setup.py b/setup.py index a26ff6b7..270ae5ef 100644 --- a/setup.py +++ b/setup.py @@ -42,11 +42,7 @@ def get_version(rel_path: str) -> str: "python-dateutil", "boto3", "semver", - # FIXME: roll back to main after merging - # "dagshub-annotation-converter>=0.1.12", - "dagshub-annotation-converter @ " - + "git+https://github.com/DagsHub/" - + "dagshub-annotation-converter@main#egg=dagshub-annotation-converter", + "dagshub-annotation-converter>=0.2.0", ] extras_require = {