From 4b1bbaae95a89bcdab4f86fc2b0d1ab0b5f21bf3 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Wed, 25 Mar 2026 13:03:49 +0200
Subject: [PATCH 1/5] Add COCO annotation import/export support

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 dagshub/__init__.py                           |   2 +-
 dagshub/auth/token_auth.py                    |   2 +-
 dagshub/data_engine/annotation/importer.py    |  14 +-
 dagshub/data_engine/annotation/metadata.py    |  84 ++++++-
 dagshub/data_engine/model/datapoint.py        |  54 ++++-
 dagshub/data_engine/model/query_result.py     | 157 +++++++++----
 dagshub/data_engine/util/__init__.py          |   0
 dagshub/data_engine/util/not_implemented.py   |  48 ++++
 .../res/audio_annotation.json                 |  82 +++++++
 .../test_annotation_parsing.py                | 103 ++++++++-
 .../annotation_import/test_coco.py            | 218 ++++++++++++++++++
 tests/data_engine/conftest.py                 |   3 +-
 tests/mocks/repo_api.py                       |   4 +
 13 files changed, 693 insertions(+), 78 deletions(-)
 create mode 100644 dagshub/data_engine/util/__init__.py
 create mode 100644 dagshub/data_engine/util/not_implemented.py
 create mode 100644 tests/data_engine/annotation_import/res/audio_annotation.json
 create mode 100644 tests/data_engine/annotation_import/test_coco.py

diff --git a/dagshub/__init__.py b/dagshub/__init__.py
index 7f4d765d..b14e2564 100644
--- a/dagshub/__init__.py
+++ b/dagshub/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.6.5"
+__version__ = "0.6.7"
 from .logger import DAGsHubLogger, dagshub_logger
 from .common.init import init
 from .upload.wrapper import upload_files
diff --git a/dagshub/auth/token_auth.py b/dagshub/auth/token_auth.py
index 31ec32ac..7ba3a70a 100644
--- a/dagshub/auth/token_auth.py
+++ b/dagshub/auth/token_auth.py
@@ -37,7 +37,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]:
 
     def can_renegotiate(self):
         # Env var tokens cannot renegotiate, every other token type can
-        return not type(self._token) is EnvVarDagshubToken
+        return type(self._token) is not EnvVarDagshubToken
 
     def renegotiate_token(self):
         if not self._token_storage.is_valid_token(self._token, self._host):
diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index c19212de..80e62468 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -3,6 +3,7 @@
 from tempfile import TemporaryDirectory
 from typing import TYPE_CHECKING, Literal, Optional, Union, Sequence, Mapping, Callable, List
 
+from dagshub_annotation_converter.converters.coco import load_coco_from_file
 from dagshub_annotation_converter.converters.cvat import load_cvat_from_zip
 from dagshub_annotation_converter.converters.yolo import load_yolo_from_fs
 from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask
@@ -16,7 +17,7 @@
 if TYPE_CHECKING:
     from dagshub.data_engine.model.datasource import Datasource
 
-AnnotationType = Literal["yolo", "cvat"]
+AnnotationType = Literal["yolo", "cvat", "coco"]
 AnnotationLocation = Literal["repo", "disk"]
 
 
@@ -85,6 +86,8 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
                 )
             elif self.annotations_type == "cvat":
                 annotation_dict = load_cvat_from_zip(annotations_file)
+            elif self.annotations_type == "coco":
+                annotation_dict, _ = load_coco_from_file(annotations_file)
 
             return annotation_dict
 
@@ -92,7 +95,6 @@ def download_annotations(self, dest_dir: Path):
         log_message("Downloading annotations from repository")
         repoApi = self.ds.source.repoApi
         if self.annotations_type == "cvat":
-            # Download just the annotation file
             repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
         elif self.annotations_type == "yolo":
             # Download the dataset .yaml file and the images + annotations
@@ -104,6 +106,8 @@ def download_annotations(self, dest_dir: Path):
             # Download the annotation data
             assert context.path is not None
             repoApi.download(self.annotations_file.parent / context.path, dest_dir, keep_source_prefix=True)
+        elif self.annotations_type == "coco":
+            repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
 
     @staticmethod
     def determine_load_location(ds: "Datasource", annotations_path: Union[str, Path]) -> AnnotationLocation:
@@ -153,8 +157,10 @@ def remap_annotations(
                 )
                 continue
             for ann in anns:
-                assert ann.filename is not None
-                ann.filename = remap_func(ann.filename)
+                if ann.filename is not None:
+                    ann.filename = remap_func(ann.filename)
+                else:
+                    ann.filename = new_filename
             remapped[new_filename] = anns
 
         return remapped
diff --git a/dagshub/data_engine/annotation/metadata.py b/dagshub/data_engine/annotation/metadata.py
index 8b5d632c..0b080e0f 100644
--- a/dagshub/data_engine/annotation/metadata.py
+++ b/dagshub/data_engine/annotation/metadata.py
@@ -1,25 +1,32 @@
-from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union, Literal, Dict
+from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Tuple, Union
 
-from dagshub_annotation_converter.formats.label_studio.task import parse_ls_task, LabelStudioTask
-from dagshub_annotation_converter.formats.yolo import import_lookup, import_yolo_result, YoloContext
+from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask, parse_ls_task
+from dagshub_annotation_converter.formats.yolo import YoloContext, import_lookup, import_yolo_result
 from dagshub_annotation_converter.formats.yolo.categories import Categories
 from dagshub_annotation_converter.ir.image import (
-    IRBBoxImageAnnotation,
     CoordinateStyle,
-    IRSegmentationImageAnnotation,
-    IRSegmentationPoint,
+    IRBBoxImageAnnotation,
     IRPoseImageAnnotation,
     IRPosePoint,
+    IRSegmentationImageAnnotation,
+    IRSegmentationPoint,
 )
 from dagshub_annotation_converter.ir.image.annotations.base import IRAnnotationBase, IRImageAnnotationBase
 
 from dagshub.common.api import UserAPI
 from dagshub.common.helpers import log_message
+from dagshub.data_engine.util.not_implemented import NotImplementedMeta
 
 if TYPE_CHECKING:
-    from dagshub.data_engine.model.datapoint import Datapoint
     import ultralytics.engine.results
 
+    from dagshub.data_engine.model.datapoint import Datapoint
+
+from dagshub_annotation_converter.formats.label_studio.videorectangle import VideoRectangleAnnotation
+from dagshub_annotation_converter.formats.label_studio.task import task_lookup as _task_lookup
+
+_task_lookup["videorectangle"] = VideoRectangleAnnotation
+
 
 class AnnotationMetaDict(dict):
     def __init__(self, annotation: "MetadataAnnotations", *args, **kwargs):
@@ -269,6 +276,28 @@ def add_image_pose(
         self.annotations.append(ann)
         self._update_datapoint()
 
+    def add_coco_annotation(
+        self,
+        coco_json: str,
+    ):
+        """
+        Add annotations from a COCO-format JSON string.
+
+        Args:
+            coco_json: A COCO-format JSON string with ``categories``, ``images``, and ``annotations`` keys.
+        """
+        from dagshub_annotation_converter.converters.coco import load_coco_from_json_string
+
+        grouped, _ = load_coco_from_json_string(coco_json)
+        new_anns: list[IRAnnotationBase] = []
+        for anns in grouped.values():
+            for ann in anns:
+                ann.filename = self.datapoint.path
+                new_anns.append(ann)
+        self.annotations.extend(new_anns)
+        log_message(f"Added {len(new_anns)} COCO annotation(s) to datapoint {self.datapoint.path}")
+        self._update_datapoint()
+
     def add_yolo_annotation(
         self,
         annotation_type: Literal["bbox", "segmentation", "pose"],
@@ -315,3 +344,44 @@ def _generate_yolo_context(annotation_type, categories: Dict[int, str]) -> YoloC
         for cat_id, cat_name in categories.items():
             cats.add(cat_name, cat_id)
         return YoloContext(annotation_type=annotation_type, categories=cats)
+
+
+class UnsupportedMetadataAnnotations(MetadataAnnotations, metaclass=NotImplementedMeta):
+    def __init__(
+        self,
+        datapoint: "Datapoint",
+        field: str,
+        original_value: bytes,
+    ):
+        super().__init__(datapoint, field, None, None, original_value)
+
+    @property
+    def value(self) -> Optional[bytes]:
+        return self._original_value
+
+    def to_ls_task(self) -> Optional[bytes]:
+        return self._original_value
+
+    def __repr__(self):
+        return "Label Studio annotations of unrecognized type"
+
+
+class ErrorMetadataAnnotations(MetadataAnnotations, metaclass=NotImplementedMeta):
+    def __init__(
+        self,
+        datapoint: "Datapoint",
+        field: str,
+        error_message: str,
+    ):
+        super().__init__(datapoint, field, None, None, None)
+        self._error_message = error_message
+
+    @property
+    def value(self) -> Optional[bytes]:
+        raise ValueError(self._error_message)
+
+    def to_ls_task(self) -> Optional[bytes]:
+        raise ValueError(self._error_message)
+
+    def __repr__(self):
+        return f"Label Studio annotation download error: {self._error_message}"
diff --git a/dagshub/data_engine/model/datapoint.py b/dagshub/data_engine/model/datapoint.py
index b7aa89b5..f0c31925 100644
--- a/dagshub/data_engine/model/datapoint.py
+++ b/dagshub/data_engine/model/datapoint.py
@@ -3,14 +3,14 @@
 from dataclasses import dataclass
 from os import PathLike
 from pathlib import Path
-from typing import Optional, Union, List, Dict, Any, Callable, TYPE_CHECKING, Literal, Sequence
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Literal, Optional, Sequence, Union
 
-from tenacity import Retrying, stop_after_attempt, wait_exponential, before_sleep_log, retry_if_exception_type
+from tenacity import Retrying, before_sleep_log, retry_if_exception_type, stop_after_attempt, wait_exponential
 
 from dagshub.common.download import download_files
 from dagshub.common.helpers import http_request
 from dagshub.data_engine.annotation import MetadataAnnotations
-from dagshub.data_engine.client.models import MetadataSelectFieldSchema, DatapointHistoryResult
+from dagshub.data_engine.client.models import DatapointHistoryResult, MetadataSelectFieldSchema
 from dagshub.data_engine.dtypes import MetadataFieldType
 
 if TYPE_CHECKING:
@@ -25,6 +25,23 @@
 logger = logging.getLogger(__name__)
 
 
+@dataclass(frozen=True)
+class BlobHashMetadata:
+    hash: str
+
+    def __str__(self) -> str:
+        return self.hash
+
+    def __repr__(self) -> str:
+        return f"BlobHashMetadata(hash={self.hash!r})"
+
+
+class BlobDownloadError(Exception):
+    def __init__(self, message):
+        super().__init__(message)
+        self.message = message
+
+
 @dataclass
 class Datapoint:
     datapoint_id: int
@@ -128,6 +145,7 @@ def from_gql_edge(edge: Dict, datasource: "Datasource", fields: List[MetadataSel
 
         float_fields = {f.name for f in fields if f.valueType == MetadataFieldType.FLOAT}
         date_fields = {f.name for f in fields if f.valueType == MetadataFieldType.DATETIME}
+        blob_fields = {f.name for f in fields if f.valueType == MetadataFieldType.BLOB}
 
         for meta_dict in edge["node"]["metadata"]:
             key = meta_dict["key"]
@@ -138,6 +156,8 @@ def from_gql_edge(edge: Dict, datasource: "Datasource", fields: List[MetadataSel
                 if key in date_fields:
                     timezone = meta_dict.get("timeZone")
                     value = _datetime_from_timestamp(value / 1000, timezone or "+00:00")
+                elif key in blob_fields and isinstance(value, str):
+                    value = BlobHashMetadata(value)
             res.metadata[key] = value
         return res
 
@@ -164,7 +184,7 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes:
         if type(current_value) is bytes:
             # Bytes - it's already there!
             return current_value
-        if isinstance(current_value, Path):
+        elif isinstance(current_value, Path):
             # Path - assume the path exists and is already downloaded,
             #   because it's unlikely that the user has set it themselves
             with current_value.open("rb") as f:
@@ -173,18 +193,16 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes:
                 self.metadata[column] = content
             return content
 
-        elif type(current_value) is str:
-            # String - This is probably the hash of the blob, get that from dagshub
-            blob_url = self.blob_url(current_value)
-            blob_location = self.blob_cache_location / current_value
+        elif isinstance(current_value, BlobHashMetadata):
+            # Blob hash metadata - download blob from DagsHub
+            blob_url = self.blob_url(current_value.hash)
+            blob_location = self.blob_cache_location / current_value.hash
 
             # Make sure that the cache location exists
             if cache_on_disk:
                 self.blob_cache_location.mkdir(parents=True, exist_ok=True)
 
             content = _get_blob(blob_url, blob_location, self.datasource.source.repoApi.auth, cache_on_disk, True)
-            if type(content) is str:
-                raise RuntimeError(f"Error while downloading blob: {content}")
 
             if store_value:
                 self.metadata[column] = content
@@ -192,6 +210,11 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes:
                 self.metadata[column] = blob_location
 
             return content
+        elif isinstance(current_value, MetadataAnnotations):
+            ls_task = current_value.to_ls_task()
+            if ls_task is None:
+                return b""
+            return ls_task
         else:
             raise ValueError(f"Can't extract blob metadata from value {current_value} of type {type(current_value)}")
 
@@ -274,10 +297,17 @@ def _get_blob(
     """
     Args:
         url: url to download the blob from
-        cache_path: where the cache for the blob is (laods from it if exists, stores there if it doesn't)
+        cache_path: where the cache for the blob is (loads from it if exists, stores there if it doesn't)
         auth: auth to use for getting the blob
         cache_on_disk: whether to store the downloaded blob on disk. If False we also turn off the cache checking
         return_blob: if True returns the blob of the downloaded data, if False returns the path to the file with it
+        path_format: if return_blob is False, controls path representation. "path" returns Path, "str" returns str
+
+    Returns:
+        bytes, Path, or str path on success.
+
+    Raises:
+        BlobDownloadError on download failure.
     """
     if url is None:
         return None
@@ -313,7 +343,7 @@ def get():
             with attempt:
                 content = get()
     except Exception as e:
-        return f"Error while downloading binary blob: {e}"
+        raise BlobDownloadError(str(e)) from e
 
     if cache_on_disk:
         with cache_path.open("wb") as f:
diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index 6c326eab..6031e0bf 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -15,6 +15,8 @@
 import dacite
 import dagshub_annotation_converter.converters.yolo
 import rich.progress
+from dagshub_annotation_converter.converters.coco import export_to_coco_file
+from dagshub_annotation_converter.formats.coco import CocoContext
 from dagshub_annotation_converter.formats.yolo import YoloContext
 from dagshub_annotation_converter.formats.yolo.categories import Categories
 from dagshub_annotation_converter.formats.yolo.common import ir_mapping
@@ -30,6 +32,7 @@
 from dagshub.common.rich_util import get_rich_progress
 from dagshub.common.util import lazy_load, multi_urljoin
 from dagshub.data_engine.annotation import MetadataAnnotations
+from dagshub.data_engine.annotation.metadata import ErrorMetadataAnnotations, UnsupportedMetadataAnnotations
 from dagshub.data_engine.annotation.voxel_conversion import (
     add_ls_annotations,
     add_voxel_annotations,
@@ -37,7 +40,13 @@
 from dagshub.data_engine.client.loaders.base import DagsHubDataset
 from dagshub.data_engine.client.models import DatasourceType, MetadataSelectFieldSchema
 from dagshub.data_engine.dtypes import MetadataFieldType
-from dagshub.data_engine.model.datapoint import Datapoint, _generated_fields, _get_blob
+from dagshub.data_engine.model.datapoint import (
+    BlobDownloadError,
+    BlobHashMetadata,
+    Datapoint,
+    _generated_fields,
+    _get_blob,
+)
 from dagshub.data_engine.model.schema_util import dacite_config
 from dagshub.data_engine.voxel_plugin_server.utils import set_voxel_envvars
 
@@ -389,10 +398,9 @@ def get_blob_fields(
         for dp in self.entries:
             for fld in fields:
                 field_value = dp.metadata.get(fld)
-                # If field_value is a blob or a path, then ignore, means it's already been downloaded
-                if not isinstance(field_value, str):
+                if not isinstance(field_value, BlobHashMetadata):
                     continue
-                download_task = (dp, fld, dp.blob_url(field_value), dp.blob_cache_location / field_value)
+                download_task = (dp, fld, dp.blob_url(field_value.hash), dp.blob_cache_location / field_value.hash)
                 to_download.append(download_task)
 
         progress = get_rich_progress(rich.progress.MofNCompleteColumn())
@@ -402,8 +410,6 @@ def get_blob_fields(
 
         def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path):
             blob_or_path = _get_blob(url, blob_path, auth, cache_on_disk, load_into_memory, path_format)
-            if isinstance(blob_or_path, str) and path_format != "str":
-                logger.warning(f"Error while downloading blob for field {field} in datapoint {dp.path}:{blob_or_path}")
             dp.metadata[field] = blob_or_path
 
         with progress:
@@ -415,7 +421,7 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path):
                         logger.warning(f"Got exception {type(exc)} while downloading blob: {exc}")
                     progress.update(task, advance=1)
 
-        self._convert_annotation_fields(*fields, load_into_memory=load_into_memory)
+        self._convert_annotation_fields(*fields)
 
         # Convert any downloaded document fields
         document_fields = [f for f in fields if f in self.document_fields]
@@ -424,49 +430,63 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path):
         if document_fields:
             for dp in self:
                 for fld in document_fields:
-                    if fld in dp.metadata:
-                        # Override the load_into_memory flag, because we need the contents
-                        if not load_into_memory:
-                            dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes()
-                        dp.metadata[fld] = dp.metadata[fld].decode("utf-8")
+                    if fld not in dp.metadata:
+                        continue
+                    try:
+                        content = dp.get_blob(fld)
+                        dp.metadata[fld] = content.decode("utf-8")
+                    except BlobDownloadError as e:
+                        logger.warning(f"Failed to download document field '{fld}' for datapoint '{dp.path}': {e}")
 
         return self
 
-    def _convert_annotation_fields(self, *fields, load_into_memory):
+    def _convert_annotation_fields(self, *fields):
         # Convert any downloaded annotation column
         annotation_fields = [f for f in fields if f in self.annotation_fields]
+        if not annotation_fields:
+            return
 
+        # List of datapoints with annotations that couldn't be parsed
         bad_annotations = defaultdict(list)
 
-        if annotation_fields:
-            # Convert them
-            for dp in self:
-                for fld in annotation_fields:
-                    if fld in dp.metadata:
-                        # Already loaded - skip
-                        if isinstance(dp.metadata[fld], MetadataAnnotations):
-                            continue
-                        # Override the load_into_memory flag, because we need the contents
-                        if not load_into_memory:
-                            dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes()
-                        try:
-                            dp.metadata[fld] = MetadataAnnotations.from_ls_task(
-                                datapoint=dp, field=fld, ls_task=dp.metadata[fld]
-                            )
-                        except ValidationError:
-                            bad_annotations[fld].append(dp.path)
-                    else:
-                        dp.metadata[fld] = MetadataAnnotations(datapoint=dp, field=fld)
+        for dp in self:
+            for fld in annotation_fields:
+                metadata_value = dp.metadata.get(fld)
+                # No value - create empty annotation container
+                if metadata_value is None:
+                    dp.metadata[fld] = MetadataAnnotations(datapoint=dp, field=fld)
+                    continue
+                # Already loaded - skip
+                elif isinstance(metadata_value, MetadataAnnotations):
+                    continue
+                # Parse annotation from the content of the field
+                else:
+                    try:
+                        annotation_content = dp.get_blob(fld)
+                        dp.metadata[fld] = MetadataAnnotations.from_ls_task(
+                            datapoint=dp, field=fld, ls_task=annotation_content
+                        )
+                    except BlobDownloadError as e:
+                        dp.metadata[fld] = ErrorMetadataAnnotations(datapoint=dp, field=fld, error_message=e.message)
+                        bad_annotations[fld].append(dp.path)
+                    except ValidationError:
+                        dp.metadata[fld] = UnsupportedMetadataAnnotations(
+                            datapoint=dp, field=fld, original_value=annotation_content
+                        )
+                        bad_annotations[fld].append(dp.path)
 
         if bad_annotations:
             log_message(
-                "Warning: The following datapoints had invalid annotations, "
-                "any annotation-related operations will not work on these:"
+                "Warning: The following datapoints had unsupported or invalid annotations, "
+                "convenience functions like `add_bounding_box` won't work on these:"
             )
             err_msg = ""
             for fld, dps in bad_annotations.items():
-                err_msg += f'Field "{fld}" in datapoints:\n\t'
-                err_msg += "\n\t".join(dps)
+                err_msg += f'\nField "{fld}" in datapoints:\n\t'
+                if len(dps) > 10:
+                    err_msg += "\n\t".join(dps[:10]) + f"\n\t... and {len(dps) - 10} more"
+                else:
+                    err_msg += "\n\t".join(dps)
             log_message(err_msg)
 
     def download_binary_columns(
@@ -760,6 +780,16 @@ def _get_all_annotations(self, annotation_field: str) -> List[IRImageAnnotationB
                 annotations.extend(dp.metadata[annotation_field].annotations)
         return annotations
 
+    def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str:
+        if annotation_field is not None:
+            return annotation_field
+        annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()])
+        if len(annotation_fields) == 0:
+            raise ValueError("No annotation fields found in the datasource")
+        annotation_field = annotation_fields[0]
+        log_message(f"Using annotations from field {annotation_field}")
+        return annotation_field
+
     def export_as_yolo(
         self,
         download_dir: Optional[Union[str, Path]] = None,
@@ -785,12 +815,7 @@ def export_as_yolo(
         Returns:
             The path to the YAML file with the metadata. Pass this path to ``YOLO.train()`` to train a model.
         """
-        if annotation_field is None:
-            annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()])
-            if len(annotation_fields) == 0:
-                raise ValueError("No annotation fields found in the datasource")
-            annotation_field = annotation_fields[0]
-            log_message(f"Using annotations from field {annotation_field}")
+        annotation_field = self._resolve_annotation_field(annotation_field)
 
         if download_dir is None:
             download_dir = Path("dagshub_export")
@@ -843,6 +868,54 @@ def export_as_yolo(
         log_message(f"Done! Saved YOLO Dataset, YAML file is at {yaml_path.absolute()}")
         return yaml_path
 
+    def export_as_coco(
+        self,
+        download_dir: Optional[Union[str, Path]] = None,
+        annotation_field: Optional[str] = None,
+        output_filename: str = "annotations.json",
+        classes: Optional[Dict[int, str]] = None,
+    ) -> Path:
+        """
+        Downloads the files and exports annotations in COCO format.
+
+        Args:
+            download_dir: Where to download the files. Defaults to ``./dagshub_export``
+            annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field.
+            output_filename: Name of the output COCO JSON file. Default is ``annotations.json``.
+            classes: Category mapping for the COCO dataset as ``{id: name}``.
+                If ``None``, categories will be inferred from the annotations.
+
+        Returns:
+            Path to the exported COCO JSON file.
+        """
+        annotation_field = self._resolve_annotation_field(annotation_field)
+
+        if download_dir is None:
+            download_dir = Path("dagshub_export")
+        download_dir = Path(download_dir)
+
+        annotations = self._get_all_annotations(annotation_field)
+        if not annotations:
+            raise RuntimeError("No annotations found to export")
+
+        context = CocoContext()
+        if classes is not None:
+            context.categories = dict(classes)
+
+        # Add the source prefix to all annotations
+        for ann in annotations:
+            ann.filename = os.path.join(self.datasource.source.source_prefix, ann.filename)
+
+        image_download_path = download_dir / "data"
+        log_message("Downloading image files...")
+        self.download_files(image_download_path)
+
+        output_path = download_dir / output_filename
+        log_message("Exporting COCO annotations...")
+        result_path = export_to_coco_file(annotations, output_path, context=context)
+        log_message(f"Done! Saved COCO annotations to {result_path.absolute()}")
+        return result_path
+
     def to_voxel51_dataset(self, **kwargs) -> "fo.Dataset":
         """
         Creates a voxel51 dataset that can be used with\
diff --git a/dagshub/data_engine/util/__init__.py b/dagshub/data_engine/util/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/dagshub/data_engine/util/not_implemented.py b/dagshub/data_engine/util/not_implemented.py
new file mode 100644
index 00000000..d9a81285
--- /dev/null
+++ b/dagshub/data_engine/util/not_implemented.py
@@ -0,0 +1,48 @@
+class NotImplementedMeta(type):
+    """
+    A metaclass that replaces all parent class methods and properties that aren't overridden in the subclass
+    with NotImplementedError.
+    """
+
+    def __new__(mcs, name, bases, namespace):
+        # Get all attributes from base classes
+        for base in bases:
+            for attr_name in dir(base):
+                if attr_name.startswith("_"):
+                    continue
+
+                # Skip if already defined in subclass
+                if attr_name in namespace:
+                    continue
+
+                base_attr = getattr(base, attr_name)
+
+                # Handle properties
+                if isinstance(base_attr, property):
+                    # Create a property that raises NotImplementedError
+                    def make_not_implemented_property(prop_name):
+                        def getter(self):
+                            raise NotImplementedError(f"Property '{prop_name}' not implemented")
+
+                        def setter(self, value):
+                            raise NotImplementedError(f"Property '{prop_name}' not implemented")
+
+                        def deleter(self):
+                            raise NotImplementedError(f"Property '{prop_name}' not implemented")
+
+                        return property(getter, setter, deleter)
+
+                    namespace[attr_name] = make_not_implemented_property(attr_name)
+
+                # Handle regular methods
+                elif callable(base_attr):
+
+                    def make_not_implemented(method_name):
+                        def not_impl(self, *args, **kwargs):
+                            raise NotImplementedError(f"Method '{method_name}' not implemented")
+
+                        return not_impl
+
+                    namespace[attr_name] = make_not_implemented(attr_name)
+
+        return super().__new__(mcs, name, bases, namespace)
diff --git a/tests/data_engine/annotation_import/res/audio_annotation.json b/tests/data_engine/annotation_import/res/audio_annotation.json
new file mode 100644
index 00000000..adc356e2
--- /dev/null
+++ b/tests/data_engine/annotation_import/res/audio_annotation.json
@@ -0,0 +1,82 @@
+{
+  "id": 41,
+  "data": {
+    "audio": "https://example.com/some-non-existent-file.mp3",
+    "media type": "audio/mpeg",
+    "size": 111699
+  },
+  "meta": {
+    "datapoint_id": 12345678,
+    "datasource_id": 6565
+  },
+  "created_at": "2025-12-20T13:44:02.316027Z",
+  "updated_at": "2026-01-26T15:00:13.046967Z",
+  "is_labeled": true,
+  "project": 1,
+  "annotations": [
+    {
+      "completed_by": 1,
+      "result": [
+        {
+          "type": "choices",
+          "value": {
+            "choices": [
+              "true"
+            ]
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "further_utterance",
+          "id": "deadbeef1"
+        },
+        {
+          "type": "rating",
+          "value": {
+            "rating": 1
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "difficulty",
+          "id": "deadbeef1"
+        },
+        {
+          "type": "textarea",
+          "value": {
+            "text": [
+              "kirill@dagshub.com"
+            ]
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "email_address",
+          "id": "deadbeef1"
+        },
+        {
+          "to_name": "audio",
+          "from_name": "first_name",
+          "id": "Qzu1dR2RQ8",
+          "type": "textarea",
+          "value": {
+            "text": [
+              "Kirill"
+            ]
+          },
+          "origin": "manual"
+        },
+        {
+          "type": "textarea",
+          "value": {
+            "text": [
+              "Bolashev"
+            ]
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "last_name",
+          "id": "deadbeef1"
+        }
+      ],
+      "ground_truth": false
+    }
+  ]
+}
diff --git a/tests/data_engine/annotation_import/test_annotation_parsing.py b/tests/data_engine/annotation_import/test_annotation_parsing.py
index 66840ecb..c04b0d51 100644
--- a/tests/data_engine/annotation_import/test_annotation_parsing.py
+++ b/tests/data_engine/annotation_import/test_annotation_parsing.py
@@ -1,19 +1,24 @@
 import json
+from os import PathLike
 from pathlib import Path
+from typing import Union
 from unittest.mock import MagicMock
 
 import pytest
 from dagshub_annotation_converter.ir.image import IRSegmentationImageAnnotation
+from pytest import MonkeyPatch
 
 from dagshub.data_engine.annotation import MetadataAnnotations
+from dagshub.data_engine.annotation.metadata import ErrorMetadataAnnotations, UnsupportedMetadataAnnotations
 from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags
-from dagshub.data_engine.model import query_result
+from dagshub.data_engine.model import datapoint, query_result
+from dagshub.data_engine.model.datapoint import BlobDownloadError, BlobHashMetadata
 from dagshub.data_engine.model.datasource import Datasource
+from dagshub.data_engine.model.query_result import QueryResult
 from tests.data_engine.util import add_metadata_field
 
 _annotation_field_name = "annotation"
 _dp_path = "data/sample_datapoint.jpg"
-_annotation_hash = "annotation1"  # Corresponds to a resource JSON
 _res_folder = Path(__file__).parent / "res"
 
 
@@ -51,17 +56,24 @@ def mock_annotation_query_result(
     return query_result.QueryResult.from_gql_query(data_dict, ds)
 
 
-def mock_get_blob(*args, **kwargs) -> bytes:
+def mock_get_blob(*args, **kwargs) -> Union[bytes, PathLike]:
     download_url: str = args[0]
     blob_hash = download_url.split("/")[-1]
+    load_into_memory = args[4]
     blob_path = _res_folder / f"{blob_hash}.json"
-    if not blob_path.exists():
-        raise FileNotFoundError(f"Mock blob file not found: {blob_path}")
-    return blob_path.read_bytes()
 
+    try:
+        if not blob_path.exists():
+            raise FileNotFoundError(f"Blob with hash {blob_hash} not found in res folder")
+        if load_into_memory:
+            return blob_path.read_bytes()
+        else:
+            return blob_path
+    except Exception as e:
+        raise BlobDownloadError(str(e)) from e
 
-@pytest.fixture
-def ds_with_document_annotation(ds, monkeypatch):
+
+def _ds_with_annotation(ds: "Datasource", monkeypatch: MonkeyPatch, annotation_hash: str):
     add_metadata_field(
         ds,
         _annotation_field_name,
@@ -70,18 +82,89 @@ def ds_with_document_annotation(ds, monkeypatch):
     )
 
     ds.source.client.get_datapoints = MagicMock(
-        return_value=mock_annotation_query_result(ds, _annotation_field_name, _dp_path, _annotation_hash)
+        return_value=mock_annotation_query_result(ds, _annotation_field_name, _dp_path, annotation_hash)
     )
 
     monkeypatch.setattr(query_result, "_get_blob", mock_get_blob)
+    monkeypatch.setattr(datapoint, "_get_blob", mock_get_blob)
 
-    yield ds
+    return ds
+
+
+@pytest.fixture
+def ds_with_document_annotation(ds, monkeypatch):
+    yield _ds_with_annotation(ds, monkeypatch, "annotation1")
 
 
 def test_annotation_with_document_are_parsed_as_annotation(ds_with_document_annotation):
     qr = ds_with_document_annotation.all()
+    _test_annotation(qr)
+
+
+def test_double_loading_annotation_works(ds_with_document_annotation):
+    qr = ds_with_document_annotation.all()
+    qr.get_blob_fields(_annotation_field_name)
+    _test_annotation(qr)
+
+
+def _test_annotation(qr: QueryResult):
     annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name]
     assert isinstance(annotation, MetadataAnnotations)
     # Check that the annotation got parsed correctly, the JSON should have one segmentation annotation in it
     assert len(annotation.annotations) == 1
     assert isinstance(annotation.annotations[0], IRSegmentationImageAnnotation)
+
+
+@pytest.fixture
+def ds_with_unsupported_annotation(ds, monkeypatch):
+    yield _ds_with_annotation(ds, monkeypatch, "audio_annotation")
+
+
+def test_handling_unsupported_annotation(ds_with_unsupported_annotation):
+    qr = ds_with_unsupported_annotation.all()
+
+    annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name]
+
+    assert isinstance(annotation, UnsupportedMetadataAnnotations)
+    # Unsupported annotation is still a subclass of regular annotation
+    # This is crucial for logic that checks if annotation metadata was parsed already,
+    # so if this starts failing, that logic will need to be changed too
+    assert isinstance(annotation, MetadataAnnotations)
+
+    with pytest.raises(NotImplementedError):
+        annotation.add_image_bbox("cat", 0, 0, 10, 10, 1920, 1080)
+
+    expected_content = (_res_folder / "audio_annotation.json").read_bytes()
+    assert annotation.value == expected_content
+    assert annotation.to_ls_task() == expected_content
+
+
+@pytest.fixture
+def ds_with_nonexistent_annotation(ds, monkeypatch):
+    yield _ds_with_annotation(ds, monkeypatch, "nonexistent_annotation")
+
+
+def test_nonexistent_annotation(ds_with_nonexistent_annotation):
+    qr = ds_with_nonexistent_annotation.all(load_documents=False, load_annotations=False)
+    qr.get_annotations()
+
+    annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name]
+
+    assert isinstance(annotation, ErrorMetadataAnnotations)
+    # Error annotation is still a subclass of regular annotation
+    # This is crucial for logic that checks if annotation metadata was parsed already,
+    # so if this starts failing, that logic will need to be changed too
+    assert isinstance(annotation, MetadataAnnotations)
+
+    with pytest.raises(NotImplementedError):
+        annotation.add_image_bbox("cat", 0, 0, 10, 10, 1920, 1080)
+
+    with pytest.raises(ValueError, match="Blob with hash nonexistent_annotation not found in res folder"):
+        _ = annotation.value
+    with pytest.raises(ValueError, match="Blob with hash nonexistent_annotation not found in res folder"):
+        annotation.to_ls_task()
+
+
+def test_blob_metadata_is_wrapped_from_backend(ds_with_document_annotation):
+    qr = ds_with_document_annotation.all(load_documents=False, load_annotations=False)
+    assert isinstance(qr[0].metadata[_annotation_field_name], BlobHashMetadata)
diff --git a/tests/data_engine/annotation_import/test_coco.py b/tests/data_engine/annotation_import/test_coco.py
new file mode 100644
index 00000000..9b238fd1
--- /dev/null
+++ b/tests/data_engine/annotation_import/test_coco.py
@@ -0,0 +1,218 @@
+import datetime
+import json
+from pathlib import PurePosixPath
+from unittest.mock import patch, PropertyMock
+
+import pytest
+from dagshub_annotation_converter.ir.image import (
+    IRBBoxImageAnnotation,
+    CoordinateStyle,
+)
+
+from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationsNotFoundError
+from dagshub.data_engine.annotation.metadata import MetadataAnnotations
+from dagshub.data_engine.client.models import MetadataSelectFieldSchema
+from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags
+from dagshub.data_engine.model.datapoint import Datapoint
+from dagshub.data_engine.model.query_result import QueryResult
+
+
+@pytest.fixture(autouse=True)
+def mock_source_prefix(ds):
+    with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()):
+        yield
+
+
+# --- import ---
+
+
+def test_import_coco_from_file(ds, tmp_path):
+    _write_coco(tmp_path, _make_coco_json())
+    importer = AnnotationImporter(ds, "coco", tmp_path / "annotations.json", load_from="disk")
+    result = importer.import_annotations()
+
+    assert "image1.jpg" in result
+    assert len(result["image1.jpg"]) == 1
+    assert isinstance(result["image1.jpg"][0], IRBBoxImageAnnotation)
+
+
+def test_import_coco_nonexistent_raises(ds, tmp_path):
+    importer = AnnotationImporter(ds, "coco", tmp_path / "nope.json", load_from="disk")
+    with pytest.raises(AnnotationsNotFoundError):
+        importer.import_annotations()
+
+
+def test_coco_convert_to_ls_tasks(ds, tmp_path, mock_dagshub_auth):
+    importer = AnnotationImporter(ds, "coco", tmp_path / "ann.json", load_from="disk")
+    bbox = IRBBoxImageAnnotation(
+        filename="test.jpg", categories={"cat": 1.0},
+        top=0.1, left=0.1, width=0.2, height=0.2,
+        image_width=640, image_height=480,
+        coordinate_style=CoordinateStyle.NORMALIZED,
+    )
+    tasks = importer.convert_to_ls_tasks({"test.jpg": [bbox]})
+
+    assert "test.jpg" in tasks
+    task_json = json.loads(tasks["test.jpg"])
+    assert "annotations" in task_json
+    assert len(task_json["annotations"]) > 0
+
+
+# --- add_coco_annotation ---
+
+
+def test_add_coco_annotation_rewrites_filename(ds, mock_dagshub_auth):
+    dp = Datapoint(datasource=ds, path="my_images/photo.jpg", datapoint_id=0, metadata={})
+    meta_ann = MetadataAnnotations(datapoint=dp, field="ann")
+    meta_ann.add_coco_annotation(json.dumps(_make_coco_json()))
+
+    assert len(meta_ann.annotations) == 1
+    assert isinstance(meta_ann.annotations[0], IRBBoxImageAnnotation)
+    assert meta_ann.annotations[0].filename == "my_images/photo.jpg"
+
+
+# --- _resolve_annotation_field ---
+
+
+def test_resolve_explicit_field(ds):
+    qr = _make_qr(ds, [], ann_field="my_ann")
+    assert qr._resolve_annotation_field("explicit") == "explicit"
+
+
+def test_resolve_auto_field(ds):
+    qr = _make_qr(ds, [], ann_field="my_ann")
+    assert qr._resolve_annotation_field(None) == "my_ann"
+
+
+def test_resolve_no_fields_raises(ds):
+    qr = _make_qr(ds, [], ann_field=None)
+    with pytest.raises(ValueError, match="No annotation fields"):
+        qr._resolve_annotation_field(None)
+
+
+def test_resolve_picks_alphabetically_first(ds):
+    fields = []
+    for name in ["zebra_ann", "alpha_ann"]:
+        fields.append(MetadataSelectFieldSchema(
+            asOf=int(datetime.datetime.now().timestamp()),
+            autoGenerated=False, originalName=name,
+            multiple=False, valueType=MetadataFieldType.BLOB,
+            name=name, tags={ReservedTags.ANNOTATION.value},
+        ))
+    qr = QueryResult(datasource=ds, _entries=[], fields=fields)
+    assert qr._resolve_annotation_field(None) == "alpha_ann"
+
+
+# --- export_as_coco ---
+
+
+def test_export_coco_bbox_coordinates(ds, tmp_path):
+    dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={})
+    ann = IRBBoxImageAnnotation(
+        filename="images/test.jpg", categories={"cat": 1.0},
+        top=20.0, left=10.0, width=30.0, height=40.0,
+        image_width=640, image_height=480,
+        coordinate_style=CoordinateStyle.DENORMALIZED,
+    )
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann])
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    with patch.object(qr, "download_files"):
+        result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann")
+
+    coco = json.loads(result.read_text())
+    assert coco["annotations"][0]["bbox"] == [10.0, 20.0, 30.0, 40.0]
+
+
+def test_export_coco_no_annotations_raises(ds, tmp_path):
+    dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={})
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[])
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    with pytest.raises(RuntimeError, match="No annotations found"):
+        qr.export_as_coco(download_dir=tmp_path, annotation_field="ann")
+
+
+def test_export_coco_explicit_classes(ds, tmp_path):
+    dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={})
+    dp.metadata["ann"] = MetadataAnnotations(
+        datapoint=dp, field="ann", annotations=[_make_image_bbox("images/test.jpg")]
+    )
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    with patch.object(qr, "download_files"):
+        result = qr.export_as_coco(
+            download_dir=tmp_path, annotation_field="ann", classes={1: "cat", 2: "dog"}
+        )
+
+    coco = json.loads(result.read_text())
+    assert "cat" in {c["name"] for c in coco["categories"]}
+
+
+def test_export_coco_custom_filename(ds, tmp_path):
+    dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={})
+    dp.metadata["ann"] = MetadataAnnotations(
+        datapoint=dp, field="ann", annotations=[_make_image_bbox("images/test.jpg")]
+    )
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    with patch.object(qr, "download_files"):
+        result = qr.export_as_coco(
+            download_dir=tmp_path, annotation_field="ann", output_filename="custom.json"
+        )
+
+    assert result.name == "custom.json"
+
+
+def test_export_coco_multiple_datapoints(ds, tmp_path):
+    dps = []
+    for i, name in enumerate(["a.jpg", "b.jpg"]):
+        dp = Datapoint(datasource=ds, path=name, datapoint_id=i, metadata={})
+        dp.metadata["ann"] = MetadataAnnotations(
+            datapoint=dp, field="ann", annotations=[_make_image_bbox(name)]
+        )
+        dps.append(dp)
+
+    qr = _make_qr(ds, dps, ann_field="ann")
+    with patch.object(qr, "download_files"):
+        result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann")
+
+    coco = json.loads(result.read_text())
+    assert len(coco["annotations"]) == 2
+    assert len(coco["images"]) == 2
+
+
+# --- helpers ---
+
+
+def _make_coco_json():
+    return {
+        "categories": [{"id": 1, "name": "cat"}],
+        "images": [{"id": 1, "width": 640, "height": 480, "file_name": "image1.jpg"}],
+        "annotations": [{"id": 1, "image_id": 1, "category_id": 1, "bbox": [10, 20, 30, 40]}],
+    }
+
+
+def _write_coco(tmp_path, coco):
+    (tmp_path / "annotations.json").write_text(json.dumps(coco))
+
+
+def _make_image_bbox(filename="test.jpg") -> IRBBoxImageAnnotation:
+    return IRBBoxImageAnnotation(
+        filename=filename, categories={"cat": 1.0},
+        top=20.0, left=10.0, width=30.0, height=40.0,
+        image_width=640, image_height=480,
+        coordinate_style=CoordinateStyle.DENORMALIZED,
+    )
+
+
+def _make_qr(ds, datapoints, ann_field=None):
+    fields = []
+    if ann_field:
+        fields.append(MetadataSelectFieldSchema(
+            asOf=int(datetime.datetime.now().timestamp()),
+            autoGenerated=False, originalName=ann_field,
+            multiple=False, valueType=MetadataFieldType.BLOB,
+            name=ann_field, tags={ReservedTags.ANNOTATION.value},
+        ))
+    return QueryResult(datasource=ds, _entries=datapoints, fields=fields)
diff --git a/tests/data_engine/conftest.py b/tests/data_engine/conftest.py
index e8f0c70a..e57d1e83 100644
--- a/tests/data_engine/conftest.py
+++ b/tests/data_engine/conftest.py
@@ -5,7 +5,7 @@
 from dagshub.common.api import UserAPI
 from dagshub.common.api.responses import UserAPIResponse
 from dagshub.data_engine import datasources
-from dagshub.data_engine.client.models import MetadataSelectFieldSchema, PreprocessingStatus
+from dagshub.data_engine.client.models import DatasourceType, MetadataSelectFieldSchema, PreprocessingStatus
 from dagshub.data_engine.model.datapoint import Datapoint
 from dagshub.data_engine.model.datasource import DatasetState, Datasource
 from dagshub.data_engine.model.query_result import QueryResult
@@ -26,6 +26,7 @@ def other_ds(mocker, mock_dagshub_auth) -> Datasource:
 
 def _create_mock_datasource(mocker, id, name) -> Datasource:
     ds_state = datasources.DatasourceState(id=id, name=name, repo="kirill/repo")
+    ds_state.source_type = DatasourceType.REPOSITORY
     ds_state.path = "repo://kirill/repo/data/"
     ds_state.preprocessing_status = PreprocessingStatus.READY
     mocker.patch.object(ds_state, "client")
diff --git a/tests/mocks/repo_api.py b/tests/mocks/repo_api.py
index d457d161..22b6c94c 100644
--- a/tests/mocks/repo_api.py
+++ b/tests/mocks/repo_api.py
@@ -113,6 +113,10 @@ def generate_content_api_entry(path, is_dir=False, versioning="dvc") -> ContentA
     def default_branch(self) -> str:
         return self._default_branch
 
+    @property
+    def id(self) -> int:
+        return 1
+
     def get_connected_storages(self) -> List[StorageAPIEntry]:
         return self.storages
 

From e68048a4ea48d8fbb1f6176f7d56dc0de9b71d33 Mon Sep 17 00:00:00 2001
From: Kirill Bolashev <kirill@dagshub.com>
Date: Sun, 29 Mar 2026 13:44:10 +0300
Subject: [PATCH 2/5] Test: use the coco_converter branch of the annotation
 converter while PR is WIP

---
 setup.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 6cdef855..a4a08913 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,7 @@
-import setuptools
 import os.path
 
+import setuptools
+
 
 # Thank you pip contributors
 def read(rel_path: str) -> str:
@@ -41,7 +42,11 @@ def get_version(rel_path: str) -> str:
     "python-dateutil",
     "boto3",
     "semver",
-    "dagshub-annotation-converter>=0.1.12",
+    # FIXME: roll back to main after merging
+    # "dagshub-annotation-converter>=0.1.12",
+    "dagshub-annotation-converter @ "
+    + "git+https://github.com/DagsHub/"
+    + "dagshub-annotation-converter@coco_converter#egg=dagshub-annotation-converter",
 ]
 
 extras_require = {

From 4f830e466ab76dc9bca4d44d16d2d4c46a582172 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Sun, 29 Mar 2026 15:06:41 +0300
Subject: [PATCH 3/5] Fix review comments

---
 dagshub/__init__.py                           |  2 +-
 dagshub/auth/token_auth.py                    |  2 +-
 dagshub/data_engine/annotation/importer.py    |  8 +++----
 dagshub/data_engine/annotation/metadata.py    | 22 -------------------
 dagshub/data_engine/model/query_result.py     |  5 ++++-
 .../annotation_import/test_coco.py            | 22 +------------------
 tests/data_engine/conftest.py                 |  3 +++
 7 files changed, 14 insertions(+), 50 deletions(-)

diff --git a/dagshub/__init__.py b/dagshub/__init__.py
index 10f3c0cb..7f68de54 100644
--- a/dagshub/__init__.py
+++ b/dagshub/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.6.10"
+__version__ = "0.6.9"
 from .logger import DAGsHubLogger, dagshub_logger
 from .common.init import init
 from .upload.wrapper import upload_files
diff --git a/dagshub/auth/token_auth.py b/dagshub/auth/token_auth.py
index 7ba3a70a..31ec32ac 100644
--- a/dagshub/auth/token_auth.py
+++ b/dagshub/auth/token_auth.py
@@ -37,7 +37,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]:
 
     def can_renegotiate(self):
         # Env var tokens cannot renegotiate, every other token type can
-        return type(self._token) is not EnvVarDagshubToken
+        return not type(self._token) is EnvVarDagshubToken
 
     def renegotiate_token(self):
         if not self._token_storage.is_valid_token(self._token, self._host):
diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index 80e62468..90661df1 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -95,6 +95,7 @@ def download_annotations(self, dest_dir: Path):
         log_message("Downloading annotations from repository")
         repoApi = self.ds.source.repoApi
         if self.annotations_type == "cvat":
+            # Download just the annotation file
             repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
         elif self.annotations_type == "yolo":
             # Download the dataset .yaml file and the images + annotations
@@ -107,6 +108,7 @@ def download_annotations(self, dest_dir: Path):
             assert context.path is not None
             repoApi.download(self.annotations_file.parent / context.path, dest_dir, keep_source_prefix=True)
         elif self.annotations_type == "coco":
+            # Download just the annotation file
             repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
 
     @staticmethod
@@ -157,10 +159,8 @@ def remap_annotations(
                 )
                 continue
             for ann in anns:
-                if ann.filename is not None:
-                    ann.filename = remap_func(ann.filename)
-                else:
-                    ann.filename = new_filename
+                assert ann.filename is not None
+                ann.filename = remap_func(ann.filename)
             remapped[new_filename] = anns
 
         return remapped
diff --git a/dagshub/data_engine/annotation/metadata.py b/dagshub/data_engine/annotation/metadata.py
index 140f7733..06f7bc28 100644
--- a/dagshub/data_engine/annotation/metadata.py
+++ b/dagshub/data_engine/annotation/metadata.py
@@ -271,28 +271,6 @@ def add_image_pose(
         self.annotations.append(ann)
         self._update_datapoint()
 
-    def add_coco_annotation(
-        self,
-        coco_json: str,
-    ):
-        """
-        Add annotations from a COCO-format JSON string.
-
-        Args:
-            coco_json: A COCO-format JSON string with ``categories``, ``images``, and ``annotations`` keys.
-        """
-        from dagshub_annotation_converter.converters.coco import load_coco_from_json_string
-
-        grouped, _ = load_coco_from_json_string(coco_json)
-        new_anns: list[IRAnnotationBase] = []
-        for anns in grouped.values():
-            for ann in anns:
-                ann.filename = self.datapoint.path
-                new_anns.append(ann)
-        self.annotations.extend(new_anns)
-        log_message(f"Added {len(new_anns)} COCO annotation(s) to datapoint {self.datapoint.path}")
-        self._update_datapoint()
-
     def add_yolo_annotation(
         self,
         annotation_type: Literal["bbox", "segmentation", "pose"],
diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index 6031e0bf..ddec542c 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -900,7 +900,10 @@ def export_as_coco(
 
         context = CocoContext()
         if classes is not None:
-            context.categories = dict(classes)
+            categories = Categories()
+            for category_id, category_name in classes.items():
+                categories.add(category_name, category_id)
+            context.categories = categories
 
         # Add the source prefix to all annotations
         for ann in annotations:
diff --git a/tests/data_engine/annotation_import/test_coco.py b/tests/data_engine/annotation_import/test_coco.py
index 9b238fd1..0db9cf8f 100644
--- a/tests/data_engine/annotation_import/test_coco.py
+++ b/tests/data_engine/annotation_import/test_coco.py
@@ -1,7 +1,6 @@
 import datetime
 import json
-from pathlib import PurePosixPath
-from unittest.mock import patch, PropertyMock
+from unittest.mock import patch
 
 import pytest
 from dagshub_annotation_converter.ir.image import (
@@ -17,12 +16,6 @@
 from dagshub.data_engine.model.query_result import QueryResult
 
 
-@pytest.fixture(autouse=True)
-def mock_source_prefix(ds):
-    with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()):
-        yield
-
-
 # --- import ---
 
 
@@ -58,19 +51,6 @@ def test_coco_convert_to_ls_tasks(ds, tmp_path, mock_dagshub_auth):
     assert len(task_json["annotations"]) > 0
 
 
-# --- add_coco_annotation ---
-
-
-def test_add_coco_annotation_rewrites_filename(ds, mock_dagshub_auth):
-    dp = Datapoint(datasource=ds, path="my_images/photo.jpg", datapoint_id=0, metadata={})
-    meta_ann = MetadataAnnotations(datapoint=dp, field="ann")
-    meta_ann.add_coco_annotation(json.dumps(_make_coco_json()))
-
-    assert len(meta_ann.annotations) == 1
-    assert isinstance(meta_ann.annotations[0], IRBBoxImageAnnotation)
-    assert meta_ann.annotations[0].filename == "my_images/photo.jpg"
-
-
 # --- _resolve_annotation_field ---
 
 
diff --git a/tests/data_engine/conftest.py b/tests/data_engine/conftest.py
index e57d1e83..02ee8331 100644
--- a/tests/data_engine/conftest.py
+++ b/tests/data_engine/conftest.py
@@ -1,4 +1,6 @@
 import datetime
+from pathlib import PurePosixPath
+from unittest.mock import PropertyMock
 
 import pytest
 
@@ -34,6 +36,7 @@ def _create_mock_datasource(mocker, id, name) -> Datasource:
     mocker.patch.object(ds_state, "get_from_dagshub")
     # Stub out root path so all the content_path/etc work without also mocking out RepoAPI
     mocker.patch.object(ds_state, "_root_path", return_value="http://example.com")
+    mocker.patch.object(type(ds_state), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath())
     ds_state.repoApi = MockRepoAPI("kirill/repo")
     return Datasource(ds_state)
 

From eb2c6437eb9f635ed0913a030fc41c03ff30f464 Mon Sep 17 00:00:00 2001
From: Dean P <dean@dagshub.com>
Date: Mon, 13 Apr 2026 12:33:22 +0300
Subject: [PATCH 4/5] Update setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index a4a08913..a26ff6b7 100644
--- a/setup.py
+++ b/setup.py
@@ -46,7 +46,7 @@ def get_version(rel_path: str) -> str:
     # "dagshub-annotation-converter>=0.1.12",
     "dagshub-annotation-converter @ "
     + "git+https://github.com/DagsHub/"
-    + "dagshub-annotation-converter@coco_converter#egg=dagshub-annotation-converter",
+    + "dagshub-annotation-converter@main#egg=dagshub-annotation-converter",
 ]
 
 extras_require = {

From 15289ed4f54c97e155a83516959779db96a6c1b0 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Tue, 14 Apr 2026 12:20:10 +0300
Subject: [PATCH 5/5] bump converter version

---
 setup.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index a26ff6b7..270ae5ef 100644
--- a/setup.py
+++ b/setup.py
@@ -42,11 +42,7 @@ def get_version(rel_path: str) -> str:
     "python-dateutil",
     "boto3",
     "semver",
-    # FIXME: roll back to main after merging
-    # "dagshub-annotation-converter>=0.1.12",
-    "dagshub-annotation-converter @ "
-    + "git+https://github.com/DagsHub/"
-    + "dagshub-annotation-converter@main#egg=dagshub-annotation-converter",
+    "dagshub-annotation-converter>=0.2.0",
 ]
 
 extras_require = {