In [29]:
import labelbox as lb
import os
from uuid import uuid4

from lb_scripts.lb_import_files import check_missed_data_rows, upload_data_row, create_ontology, get_global_ids, import_local_files

from dotenv import load_dotenv, find_dotenv
from pprint import pprint

_=load_dotenv(find_dotenv())

client = lb.Client()

In [3]:
proj = client.get_projects(where=lb.Project.name == "animal_ML").get_one()
if not proj:
    proj = client.create_project(
        name="animal_ML",
        media_type=lb.MediaType.Image
        )

In [4]:
dataset: lb.Dataset = client.get_datasets(where=lb.Dataset.name=="animal_dataset").get_one()

if not dataset:
    assets = import_local_files("raw-img")
    dataset = client.create_dataset(
            name="animal_dataset",
            iam_integration=None
            )
    upload_data_row(assets, dataset, 1000)

In [None]:
check_missed_data_rows(dataset)

In [41]:
#create ontology if one does not exist if not return associated ontology
ontology = create_ontology("raw-img", "animal_class", client)

#check if project has ontology attached
if not proj.ontology() or proj.ontology().uid != ontology.uid:
    proj.setup_editor(ontology)

In [None]:
#check if project has batch
batches = proj.batches()
    
if not batches.get_one():
    data_rows = get_global_ids(dataset)
    proj.create_batch(name="main_batch", data_rows=data_rows)

In [14]:
#add global key to data row that was missing one
data_row = client.get_data_row("clp7osdpk1yol070107jj29ke")
data_row.update(
    global_key=str(uuid4())
)

In [24]:
#adding custom metadata field to data row missing class
mdo = client.get_data_row_metadata_ontology()
enum_schema = mdo.fields_by_id["clp7eyhcj043p07zue0jiefah"]
metadata_payload = lb.DataRowMetadata(
    global_key=data_row.global_key,
    fields=[lb.DataRowMetadataField(
        name=enum_schema.name,
        value="spider"
    )]
)
mdo.bulk_upsert([metadata_payload])

animal_class


[]

In [37]:
export_json = dataset.export_v2(
  params={
  "performance_details": True,
  "label_details": True,
  "metadata_fields": True
  }
)

export_json = export_json.result

In [38]:
pprint(export_json[0], indent=2)

{ 'data_row': { 'external_id': './raw-img/ragno/OIP-_0YC0pbR0XIolgpTaswWRwHaFj.jpeg',
                'global_key': '267977cc-84e4-4673-8165-9e159305e199',
                'id': 'clp7fey2a0zqp0778b4vst8r9',
                'row_data': 'https://storage.labelbox.com/cloordigw0091073kc6pohyab%2F91322050-f258-d298-051d-f40b955c9d66-OIP-_0YC0pbR0XIolgpTaswWRwHaFj.jpeg?Expires=1700667681839&KeyName=labelbox-assets-key-3&Signature=i1pRI8ti-fsVgL8biRx_LeLSx2o'},
  'media_attributes': { 'exif_rotation': '1',
                        'height': 225,
                        'mime_type': 'image/jpeg',
                        'width': 300},
  'metadata_fields': [ { 'schema_id': 'clp7eyhcj043p07zue0jiefah',
                         'schema_kind': 'CustomMetadataEnum',
                         'schema_name': 'animal_class',
                         'value': [ { 'schema_id': 'clp7eyhcj043z07zubnpq775e',
                                      'schema_kind': 'CustomMetadataEnumOption',
                    

In [40]:
#explore metadata fields and global keys
global_keys = []
for data in export_json:
    global_key = data["data_row"]["global_key"]
    if not global_key:
        print(f"Global Key Missing: {data['data_row']['external_id']}")
        continue
    global_keys.append(global_key)
class_dict = {}
for data in export_json:
    class_name = data["metadata_fields"][0]["value"][0]["schema_name"]
    class_dict[class_name] = class_dict.get(class_name, 0) + 1

pprint(class_dict, indent=2)
print(sum(class_dict.values()))

{ 'butterfly': 2112,
  'cat': 1668,
  'chicken': 3098,
  'cow': 1866,
  'dog': 4863,
  'elephant': 1446,
  'horse': 2623,
  'sheep': 1820,
  'spider': 4821,
  'squirrel': 1862}
26179
