In [69]:
import labelbox as lb
import labelbox.types as lb_types
import os
from uuid import uuid4
import time

from lb_scripts.lb_import_files import check_missed_data_rows, upload_data_row, create_ontology, get_global_ids, import_local_files
from lb_scripts.translate import translate

from dotenv import load_dotenv, find_dotenv
from pprint import pprint

_=load_dotenv(find_dotenv())

client = lb.Client()

In [3]:
proj = client.get_projects(where=lb.Project.name == "animal_ML").get_one()
if not proj:
    proj = client.create_project(
        name="animal_ML",
        media_type=lb.MediaType.Image
        )

In [4]:
dataset: lb.Dataset = client.get_datasets(where=lb.Dataset.name=="animal_dataset").get_one()

if not dataset:
    assets = import_local_files("raw-img")
    dataset = client.create_dataset(
            name="animal_dataset",
            iam_integration=None
            )
    upload_data_row(assets, dataset, 1000)

In [None]:
check_missed_data_rows(dataset)

In [41]:
#create ontology if one does not exist if not return associated ontology
ontology = create_ontology("raw-img", "animal_class", client)

#check if project has ontology attached
if not proj.ontology() or proj.ontology().uid != ontology.uid:
    proj.setup_editor(ontology)

In [None]:
#check if project has batch
batches = proj.batches()
    
if not batches.get_one():
    data_rows = get_global_ids(dataset)
    proj.create_batch(name="main_batch", data_rows=data_rows)

In [14]:
#add global key to data row that was missing one
data_row = client.get_data_row("clp7osdpk1yol070107jj29ke")
data_row.update(
    global_key=str(uuid4()),

)

In [24]:
#adding custom metadata field to data row missing class
mdo = client.get_data_row_metadata_ontology()
enum_schema = mdo.fields_by_id["clp7eyhcj043p07zue0jiefah"]
metadata_payload = lb.DataRowMetadata(
    global_key=data_row.global_key,
    fields=[lb.DataRowMetadataField(
        name=enum_schema.name,
        value="spider"
    )]
)
mdo.bulk_upsert([metadata_payload])

animal_class


[]

In [37]:
export_json = dataset.export_v2(
  params={
  "performance_details": True,
  "label_details": True,
  "metadata_fields": True
  }
)

export_json = export_json.result

In [38]:
pprint(export_json[0], indent=2)

{ 'data_row': { 'external_id': './raw-img/ragno/OIP-_0YC0pbR0XIolgpTaswWRwHaFj.jpeg',
                'global_key': '267977cc-84e4-4673-8165-9e159305e199',
                'id': 'clp7fey2a0zqp0778b4vst8r9',
                'row_data': 'https://storage.labelbox.com/cloordigw0091073kc6pohyab%2F91322050-f258-d298-051d-f40b955c9d66-OIP-_0YC0pbR0XIolgpTaswWRwHaFj.jpeg?Expires=1700667681839&KeyName=labelbox-assets-key-3&Signature=i1pRI8ti-fsVgL8biRx_LeLSx2o'},
  'media_attributes': { 'exif_rotation': '1',
                        'height': 225,
                        'mime_type': 'image/jpeg',
                        'width': 300},
  'metadata_fields': [ { 'schema_id': 'clp7eyhcj043p07zue0jiefah',
                         'schema_kind': 'CustomMetadataEnum',
                         'schema_name': 'animal_class',
                         'value': [ { 'schema_id': 'clp7eyhcj043z07zubnpq775e',
                                      'schema_kind': 'CustomMetadataEnumOption',
                    

In [40]:
#explore metadata fields and global keys
global_keys = []
for data in export_json:
    global_key = data["data_row"]["global_key"]
    if not global_key:
        print(f"Global Key Missing: {data['data_row']['external_id']}")
        continue
    global_keys.append(global_key)
class_dict = {}
for data in export_json:
    class_name = data["metadata_fields"][0]["value"][0]["schema_name"]
    class_dict[class_name] = class_dict.get(class_name, 0) + 1

pprint(class_dict, indent=2)
print(sum(class_dict.values()))

{ 'butterfly': 2112,
  'cat': 1668,
  'chicken': 3098,
  'cow': 1866,
  'dog': 4863,
  'elephant': 1446,
  'horse': 2623,
  'sheep': 1820,
  'spider': 4821,
  'squirrel': 1862}
26179


In [52]:
slice_ids = [
    "clp8le5np00no07ycblgjgzlc",
    "clp8lcyyj00mi07wn3soq9fm8",
    "clp8lc7to00j807x61qpjhl6b",
    "clp8lbxnr00eu07yj3or2cugo",
    "clp8lbl9s00eb07yj2pw6cq5q",
    "clp8lb9u400b707yrguy58u4s",
    "clp8laus500dl07ymadhtgphb",
    "clp8kxz89004p07zy82gb14gw",
    "clp8kxdv4007i07wnfwpmdj6t",
    "clp8kwzz8004t07ym3jl03r79"
    ]

catalog_slices = [client.get_catalog_slice(slice_id) for slice_id in slice_ids]


for catalog_slice in catalog_slices:
    proj.create_batch(
        f"{catalog_slice.name}-batch",
        catalog_slice.get_data_row_ids()
    )
    time.sleep(30)

In [73]:
classifications = ontology.classifications()
options = classifications[0].options
ontology_lst = []

for option in options:
    ontology_lst.append(option.value)

options_dict = {translate[animal]: animal for animal in ontology_lst}
pprint(options_dict, indent=2)

{ 'butterfly': 'farfalla',
  'cat': 'gatto',
  'chicken': 'gallina',
  'cow': 'mucca',
  'dog': 'cane',
  'elephant': 'elefante',
  'horse': 'cavallo',
  'sheep': 'pecora',
  'spider': 'ragno',
  'squirrel': 'scoiattolo'}


In [119]:
batches: list[lb.Batch] = proj.batches()

labels = []

for batch in batches:

    print(batch.name)
    for data_row in batch.export_data_rows(include_metadata=True):
        data_row: lb.DataRow = data_row

        animal_name = options_dict[data_row.metadata_fields[0]["value"]]
        global_key = data_row.global_key
        annotations = [
            lb_types.ClassificationAnnotation(
                name="checklist_question",
                value=lb_types.Checklist(answer=[lb_types.ClassificationAnswer(name=animal_name)])
            )
        ]
        labels.append(lb_types.Label(data=lb_types.ImageData(global_key=global_key), annotations=annotations))


Dogs-batch
Spiders-batch
Chickens-batch
Horses-batch
Butterflies-batch
Cows-batch
Squirrels-batch
Sheep-batch
Cats-batch
Elephants-batch
uid=None data=ImageData(im_bytes=None,file_path=None,url=None,arr=None) annotations=[ClassificationAnnotation(confidence=None, name='checklist_question', feature_schema_id=None, extra={}, value=Checklist(confidence=None, name='checklist', answer=[ClassificationAnswer(confidence=None, name='cane', feature_schema_id=None, extra={}, keyframe=None, classifications=[])]), message_id=None)] extra={}


In [120]:

upload_job = lb.LabelImport.create_from_objects(
    client=client,
    project_id=proj.uid,
    name="label_import_job"+str(uuid4()),
    labels=labels
)
upload_job.wait_until_done(sleep_time_seconds=10)
pprint(f"Errors: {upload_job.errors}", indent=2)
pprint(f"Status of uploads: {upload_job.statuses}", indent=2)


'Errors: []'
("Status of uploads: [{'uuid': '7a8f64db-94df-46ff-bb3e-b892e186f277', "
 "'dataRow': {'id': 'clp7g7cf406un0786012wnalb', 'globalKey': "
 "'18921056-1568-4e6d-a156-eff4c9f3c420'}, 'status': 'SUCCESS'}, {'uuid': "
 "'afd4d39d-1d64-4f23-a323-1ac75050fd93', 'dataRow': {'id': "
 "'clp7g7cf406um0786unp409ko', 'globalKey': "
 "'f8fe686c-c3ff-496d-aee0-3dfbf3ceb27e'}, 'status': 'SUCCESS'}, {'uuid': "
 "'80292c8e-776d-4363-8b55-46d15ce091e7', 'dataRow': {'id': "
 "'clp7g7cf406ul0786a7o0pnf3', 'globalKey': "
 "'007eb06c-af9d-40cd-9836-757482cfceb4'}, 'status': 'SUCCESS'}, {'uuid': "
 "'d478d29f-cee6-4439-a03e-9363c9d31c7e', 'dataRow': {'id': "
 "'clp7g7cf406uk07862rmn6565', 'globalKey': "
 "'9b74a6cd-2e91-4ed6-9b65-5790ab309aea'}, 'status': 'SUCCESS'}, {'uuid': "
 "'fdd646ab-f0bb-4c22-9f4a-9c7e5debf197', 'dataRow': {'id': "
 "'clp7g7cf406uj0786f527wnzp', 'globalKey': "
 "'d11ad0d4-a582-4cba-a24d-07e01b89cbb9'}, 'status': 'SUCCESS'}, {'uuid': "
 "'35cf4c0c-cef7-46c6-b72b-b25fe87e9c3