Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,11 @@ from clarifai_datautils import ImageAnnotations
#import from folder
coco_dataset = ImageAnnotations.import_from(path='folder_path',format= 'coco_detection')

#clarifai dataset loader object
coco_dataset.dataloader

#Using clarifai SDK to upload to Clarifai Platform
#export CLARIFAI_PAT={your personal access token} # set PAT as env variable
from clarifai.client.dataset import Dataset
dataset = Dataset(user_id="user_id", app_id="app_id", dataset_id="dataset_id")
dataset.upload_dataset(dataloader=coco_dataset.dataloader)

#info about loaded dataset
coco_dataset.get_info()
Expand Down
4 changes: 3 additions & 1 deletion clarifai_datautils/constants/annotations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
IMAGE_ANNOTATION_FORMATS = [
'coco_segmentation', 'voc_detection', 'yolo', 'cifar', 'coco_detection', 'cvat', 'imagenet',
'kitti', 'label_me', 'mnist', 'open_images', 'vgg_face2', 'lfw', 'cityscapes', 'ade20k2017'
'kitti', 'label_me', 'mnist', 'open_images', 'vgg_face2', 'lfw', 'cityscapes', 'ade20k2017',
'clarifai'
]

IMAGE_ANNOTATION_TASKS = ['visual_classification', 'visual_detection', 'visual_segmentation']
Expand All @@ -11,6 +12,7 @@
'mnist': 'visual_classification',
'vgg_face2': 'visual_classification',
'lfw': 'visual_classification',
'clarifai': 'visual_detection',
'voc_detection': 'visual_detection',
'yolo': 'visual_detection',
'coco_detection': 'visual_detection',
Expand Down
24 changes: 20 additions & 4 deletions clarifai_datautils/image/annotation_conversion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ from clarifai_datautils import ImageAnnotations
#import from folder
coco_dataset = ImageAnnotations.import_from(path='folder_path',format= 'coco_detection')

#clarifai dataset loader object
coco_dataset.dataloader


#info about loaded dataset
coco_dataset.get_info()
Expand All @@ -24,7 +21,7 @@ coco_dataset.export_to('voc_detection')
```


### With Clarifai Python SDK
### Upload using Clarifai Python SDK
```python
from clarifai_datautils import ImageAnnotations
coco_dataset = ImageAnnotations.import_from(path='folder_path',format= 'coco_detection')
Expand All @@ -38,6 +35,24 @@ dataset.upload_dataset(dataloader=coco_dataset.dataloader)
```


### Export to other formats from Clarifai Platform
```python

#clarifai SDK
#export CLARIFAI_PAT={your personal access token} # set PAT as env variable
from clarifai.client.dataset import Dataset
dataset = Dataset(user_id="user_id", app_id="app_id", dataset_id="dataset_id")
dataset.export(save_path='output.zip',split='train')

#Extract the zip file and pass the folder to ImageAnnotations
from clarifai_datautils import ImageAnnotations
clarifai_dataset = ImageAnnotations.import_from(path='folder_path',format= 'clarifai')

#export to other formats
clarifai_dataset.export_to(path='output_path',format='coco_detection',save_images=True)

```

## Supported Formats

| Annotation format | Format | TASK |
Expand All @@ -54,6 +69,7 @@ dataset.upload_dataset(dataloader=coco_dataset.dataloader)
| [Kitti](http://www.cvlibs.net/datasets/kitti/index.php) | kitti | detection |
| [LabelMe](http://labelme.csail.mit.edu/Release3.0) | label_me | detection |
| [Open Images](https://storage.googleapis.com/openimages/web/download.html) | open_images | detection |
| [Clarifai](https://github.com/Clarifai/examples/tree/main/Data_Utils) | clarifai | detection |
| [COCO(segmentation)](http://cocodataset.org/#format-data) | coco_segmentation | segmentation |
| [Cityscapes](https://www.cityscapes-dataset.com/) | cityscapes | segmentation |
| [ADE](https://www.cityscapes-dataset.com/) | ade20k2017 | segmentation |
Expand Down
37 changes: 30 additions & 7 deletions clarifai_datautils/image/annotation_conversion/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from clarifai_datautils.image.annotation_conversion.loaders import (ClassificationDataLoader,
DetectionDataLoader,
SegmentationDataLoader)
from clarifai_datautils.image.annotation_conversion.utils import Clarifai_to_Datumaro


class ImageAnnotations():
Expand Down Expand Up @@ -56,11 +57,15 @@ def import_from(cls, path: str, format: str) -> Dataset:
#task of the dataset
task = IMAGE_ANNOTATION_FORMATS_TO_TASKS[format]

try:
format_name = IMAGE_FORMAT_MAP[format]
dataset = Dataset.import_from(path, format_name)
except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex:
raise AnnotationsDatasetError(ex)
#import dataset
if format == 'clarifai':
dataset = Clarifai_to_Datumaro(path).convert()
else:
try:
format_name = IMAGE_FORMAT_MAP[format]
dataset = Dataset.import_from(path, format_name)
except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex:
raise AnnotationsDatasetError(ex)

return ImageAnnotations(dataset, format, task)

Expand All @@ -84,12 +89,13 @@ def get_info(self,) -> Dict[str, Any]:
'categories': list(self._dataset.get_categories_info())
}

def export_to(self, path: str, format: str) -> None:
def export_to(self, path: str, format: str, save_images: bool = False) -> None:
"""Exports a dataset to a given path and format.
Args:
path (str): The path to the dataset.
format (str): The format of the dataset.
save_images (bool): Whether to save the images or not.
Example:
>>> from clarifai_datautils import ImageAnnotations
Expand All @@ -99,9 +105,13 @@ def export_to(self, path: str, format: str) -> None:
if format not in IMAGE_ANNOTATION_FORMATS:
raise AnnotationsFormatError('Invalid format')

if format == 'clarifai':
raise AnnotationsFormatError(
'Cannot export to clarifai format. Use clarifai SDK to upload the dataset.')

try:
format_name = IMAGE_FORMAT_MAP[format]
self._dataset.export(path, format_name)
self._dataset.export(path, format_name, save_media=save_images)
except Exception as ex:
raise AnnotationsDatasetError(ex)

Expand Down Expand Up @@ -130,6 +140,19 @@ def detect_format(path: str) -> str:
raise AnnotationsFormatError('Given folder does not contain a supported dataset format')
return dataset_format

@staticmethod
def list_formats() -> list:
"""Lists the supported formats.
Returns:
A list of supported formats.
Example:
>>> from clarifai_datautils import ImageAnnotations
>>> ImageAnnotations.list_formats()
"""
return IMAGE_ANNOTATION_FORMATS

@property
def dataloader(self) -> ClarifaiDataLoader:
"""Returns a Clarifai Dataloader Object to pass to SDK Dataset Upload Functionality.
Expand Down
98 changes: 98 additions & 0 deletions clarifai_datautils/image/annotation_conversion/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import json
import os
from typing import Tuple

import PIL
from datumaro.components.annotation import Bbox
from datumaro.components.dataset import Dataset
from datumaro.components.dataset_base import DatasetItem
from datumaro.components.media import Image

from clarifai_datautils.errors import AnnotationsDatasetError, AnnotationsFormatError


class Clarifai_to_Datumaro():

def __init__(
self,
main_path: str,
):
"""Converts a clarifai dataset to a Datumaro dataset.

Args:
path (str): The path to the clarifai dataset.

"""
self.main_path = main_path
self.image_list = os.listdir(os.path.join(self.main_path, 'inputs'))
self.annotations_list = os.listdir(os.path.join(self.main_path, 'annotations'))
self.label_map = {}

def convert(self) -> Dataset:
"""Check folder format and creates a Datumaro Dataset.

Returns:
A Datumaro dataset object.
"""
self.check_folder()
# create a dataset
dataset = Dataset.from_iterable(
iterable=[self.create_item(path) for path in self.image_list],
media_type=Image,
categories=list(self.label_map.keys()))

return dataset

def create_item(self, image_path: str) -> DatasetItem:
"""Creates a Datumaro item from an image path."""
image_full_path = os.path.join(self.main_path, 'inputs', image_path)
image_data = Image.from_file(image_full_path)
width, height = PIL.Image.open(image_full_path).size
try:
with open(
os.path.join(self.main_path, 'annotations', image_path.split('.png')[0] + '.json'),
'r') as file:
item_data = json.load(file)
# create annotations
annotations = []
for annot in item_data:
#check if the annotation has a bounding box
if 'regionInfo' in annot.keys() and 'boundingBox' in annot['regionInfo'].keys():
x, y, w, h = self.clarifai_bbox_to_datumaro_bbox(annot['regionInfo']['boundingBox'],
width, height)
label = annot['data']['concepts'][0]['name']
value = self.label_map.get(label, len(self.label_map))
self.label_map[label] = value
annotations.append(Bbox(x=x, y=y, w=w, h=h, label=value))

except FileNotFoundError:
annotations = []

return DatasetItem(id=image_path.split('.png')[0], media=image_data, annotations=annotations)

def clarifai_bbox_to_datumaro_bbox(self, clarifai_bbox, width, height) -> Tuple[int]:
left_col = clarifai_bbox['leftCol'] * width
top_row = clarifai_bbox['topRow'] * height
right_col = clarifai_bbox['rightCol'] * width
bottom_row = clarifai_bbox['bottomRow'] * height

obj_box = (left_col, top_row, right_col - left_col, bottom_row - top_row)
return obj_box

def check_folder(self):
"""Checks the clarifai folder format."""
if not os.path.exists(self.main_path):
raise AnnotationsDatasetError(f'Folder not found at {self.main_path}')

if not os.path.exists(os.path.join(self.main_path, 'inputs')):
raise AnnotationsFormatError(
f'Folder does not contain an "inputs" folder at {self.main_path}')
if not os.path.exists(os.path.join(self.main_path, 'annotations')):
raise AnnotationsFormatError(
f'Folder does not contain an "annotations" folder at {self.main_path}')

if not all(img.endswith('.png') for img in self.image_list):
raise AnnotationsFormatError(f'Folder should only contain images at {self.main_path}/inputs')
if not all(img.endswith('.json') for img in self.annotations_list):
raise AnnotationsFormatError(
f'Folder should only contain annotations at {self.main_path}/annotations')
11 changes: 9 additions & 2 deletions tests/annotations/test_clarifai_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
KITTI_PATH = get_asset_path('kitti_detection')
LABEL_ME_PATH = get_asset_path('labelme_dataset')
OPEN_IMAGES_PATH = get_asset_path('openimages_dataset')
CLARIFAI_PATH = get_asset_path('clarifai_dataset')

COCO_SEGMENTATION_PATH = get_asset_path('coco_segmentation')
CITYSCAPES_PATH = get_asset_path('cityscapes_dataset')
Expand Down Expand Up @@ -83,8 +84,6 @@ def test_coco_detection_loader(self,):
dataloader = annotation_object.dataloader
assert dataloader.task == 'visual_detection'
assert len(dataloader) == 2
assert dataloader[0].labels == ['b']
assert dataloader[0].id == 'a'
assert isinstance(dataloader[0].image_bytes, bytes)

def test_cvat_loader(self,):
Expand Down Expand Up @@ -121,6 +120,14 @@ def test_open_images_loader(self,):
assert dataloader[1].id == 'aa'
assert isinstance(dataloader[0].image_bytes, bytes)

def test_clarifai_loader(self,):
annotation_object = ImageAnnotations.import_from(path=CLARIFAI_PATH, format='clarifai')
dataloader = annotation_object.dataloader
assert dataloader.task == 'visual_detection'
assert len(dataloader) == 1
assert dataloader[0].id == '000464'
assert isinstance(dataloader[0].image_bytes, bytes)

def test_coco_segmentation_loader(self,):
annotation_object = ImageAnnotations.import_from(
path=COCO_SEGMENTATION_PATH, format='coco_segmentation')
Expand Down
8 changes: 8 additions & 0 deletions tests/annotations/test_import_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
KITTI_PATH = get_asset_path('kitti_detection')
LABEL_ME_PATH = get_asset_path('labelme_dataset')
OPEN_IMAGES_PATH = get_asset_path('openimages_dataset')
CLARIFAI_PATH = get_asset_path('clarifai_dataset')

COCO_SEGMENTATION_PATH = get_asset_path('coco_segmentation')
CITYSCAPES_PATH = get_asset_path('cityscapes_dataset')
Expand Down Expand Up @@ -118,6 +119,13 @@ def test_open_images_import(self,):
assert annotation_object.task == 'visual_detection'
assert len(annotation_object._dataset._data) == 2 # 2 images

def test_clarifai_import(self,):
annotation_object = ImageAnnotations.import_from(path=CLARIFAI_PATH, format='clarifai')
assert annotation_object.annotation_format == 'clarifai'
assert annotation_object.task == 'visual_detection'
assert len(annotation_object._dataset._data) == 1 # 1 images
assert annotation_object._dataset.get_annotations() == 2 # 2 annotations

def test_coco_segmentation_import(self,):
annotation_object = ImageAnnotations.import_from(
path=COCO_SEGMENTATION_PATH, format='coco_segmentation')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"id": "c72ffee676ad6ae88acfc72791bdae14", "regionInfo": {"boundingBox": {"topRow": 0.502, "leftCol": 0.18666667, "bottomRow": 0.626, "rightCol": 0.5733333}}, "data": {"concepts": [{"id": "id-cow", "name": "cow", "value": 1.0, "appId": "demo_train_1402"}]}}, {"id": "b75d9daca8d6b1f07d2e4a39f36bb4c7", "regionInfo": {"boundingBox": {"topRow": 0.402, "leftCol": 0.152, "bottomRow": 0.588, "rightCol": 0.64}}, "data": {"concepts": [{"id": "id-cow", "name": "cow", "value": 1.0, "appId": "demo_train_1402"}]}}]
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.