In [3]:
from labelbox import Client, Project, Dataset
from labelbox import LabelingFrontend
import json
from collections import defaultdict
from PIL import Image
import cv2
import datetime
import os
import uuid
import numpy as np
from tqdm import tqdm_gui

In [79]:
#Download data from here: https://beerys.github.io/CaltechCameraTraps/
#We used the small dataset
data = json.load(open('CaltechCameraTrapsECCV18.json'))

In [80]:
data['categories'] = {d['id'] : d for d in data['categories']}
annotations = defaultdict(lambda: [])

In [None]:
#Unique sequence so the same animal isn't in the train and eval set by chance
#We also want different seq_ids so that they are all from different sequences (not same animal)

images = {}
ids = set()
for img in data['images']:
    if img['seq_id'] in ids:
        continue
    ids.add(img['seq_id'])
    images[img['id']] = img
data['images'] = images

In [82]:
print(len(data['images']))

22719


In [83]:
for annotation in data['annotations']:
    if annotation.get('bbox') is None:
        if annotation['image_id'] in data['images']:
            del data['images'][annotation['image_id']]
        continue
    annotations[annotation['image_id']].append(annotation)

In [84]:

def process_image(image):
    date_time_obj = datetime.datetime.strptime(image['date_captured'], '%Y-%m-%d %H:%M:%S')
    if (not ((18 > date_time_obj.hour > 7)) or (date_time_obj.hour == 12)):
        #Only train on day time images
        return
    
    annots = annotations[image['id']]
    im = None 
    box_coords = []
    
    for annot in annots:
        if not (data['categories'][annot['category_id']]['name'] in ['dog', 'cat', 'deer','bobcat', 'fox']):
            return
            
        h, w = image['height'], image['width']
        bbox = annot.get('bbox')
        assert bbox is not None
        
        if bbox[0] < 50 or bbox[1] < 50:
            #Ignore tiny bboxes
            return
        
        if (w - (bbox[0] + bbox[2])) < 50 or (h - (bbox[1] + bbox[3])) < 50:
            return 
        
        if im is None:
            im = np.array(Image.open(os.path.join('eccv_18_all_images_sm', image['file_name'])))
            new_h, new_w = im.shape[:2]    
            
        scale = lambda x, y: (int((x / h) * new_h), int((y / w) * new_w))
        start_pt = scale(bbox[0], bbox[1])
        end_pt = scale(bbox[0] + bbox[2], bbox[1]+ bbox[3])
        box_coords.append([start_pt, end_pt])
    return im,box_coords, image['location']


In [85]:
examples = [process_image(ex) for ex in tqdm_gui(list(data['images'].values()))]

Using matplotlib backend: MacOSX


  examples = [process_image(ex) for ex in tqdm_gui(list(data['images'].values()))]


In [86]:
examples = [ex for ex in examples if ex is not None]
print(len(examples))

1045


In [87]:
#os.mkdir("uploaded_images")
#os.mkdir("labels")
image_paths = []
for idx, example in tqdm_gui(enumerate(examples)):
    imm, coords, location = example
    image_path = os.path.join("uploaded_images", f"{idx}.jpg")
    image_paths.append(image_path)
    Image.fromarray(imm).save(image_path)
    with open(os.path.join("labels", f"{idx}.json"), 'w') as file:
        file.write(json.dumps({'coords' : coords, 'location' : location}))


  for idx, example in tqdm_gui(enumerate(examples)):


In [5]:
client = Client()
project = list(client.get_projects(where = Project.name == "animal_demo_proj"))[0]
#daaset = list(client.get_datasets(where = Dataset.name == "animal_demo_ds"))[0]

In [93]:
project = client.create_project(name = "animal_demo_proj")
dataset = client.create_dataset(name = "animal_demo_ds")
project.datasets.connect(dataset)
dataset.create_data_rows(image_paths)

<Task ID: ckm4y6s2a28dq0712h0ytuedz>

In [12]:
ontology = {'tools': [{'schemaNodeId': 'ckm3se1a5010x0y6r3hpsb0wc',
   'featureSchemaId': 'ckm3se1a5010y0y6rcdc0bvvl',
   'required': False,
   'name': 'animal',
   'tool': 'rectangle',
   'color': '#1CE6FF',
   'classifications': []}],
 'classifications': [{'schemaNodeId': 'ckm3se1a4010v0y6rfe3b9s8t',
   'featureSchemaId': 'ckm3se1a4010w0y6rd579gyl3',
   'required': False,
   'instructions': 'image',
   'name': 'image',
   'type': 'checklist',
   'options': [{'schemaNodeId': 'ckm3se1aw010z0y6rdgaof1fu',
     'featureSchemaId': 'ckm3se1aw01100y6rf8b59u51',
     'label': 'difficult',
     'value': 'difficult'},
    {'schemaNodeId': 'ckm3se1aw01110y6rfx3benbx',
     'featureSchemaId': 'ckm3se1aw01120y6r7zkcd93v',
     'label': 'contains_human',
     'value': 'contains_human'}]}]}

In [92]:
editor = client.get_labeling_frontends(where=LabelingFrontend.name == "Editor")
editor = list(editor)[0]
project.setup(editor, ontology)

In [94]:
def get_labels(dr):
    label_name = dr.external_id.split('/')[-1].replace('.jpg', '.json')
    label_name = f"labels/{label_name}"
    labels = json.load(open(label_name))
    return labels

In [95]:
datarows = [dr for dr in list(project.datasets())[0].data_rows()]

In [96]:
get_labels(datarows[0])

{'coords': [[[679, 276], [821, 398]]], 'location': 38}

In [13]:
featureSchemaId = ontology['tools'][0]['featureSchemaId']

In [14]:
boxes = []
for datarow in datarows:
    label = get_labes(datarow)['coords'][0]
    row = {
        'uuid' : str(uuid.uuid4()),
        'schemaId' : featureSchemaId,
        'dataRow' : {'id' : datarow.uid},
        'bbox' : {
            'top' : label[0][1],
            'left' : label[0][0],
            'height' : label[1][1] - label[0][1],
            'width' : label[1][0] - label[0][0]            
        }
    }
    boxes.append(row)
    
#Had a heart attack becauseI thought validation was wrong. But it saved me 30 min
#Passed in datarow in stead of data_id.



NameError: name 'datarows' is not defined

In [101]:
project.upload_annotations(name = "upload-2", annotations = boxes)

<BulkImportRequest ID: 52e66228-6c37-4887-99b9-0c5dd13afc38>

In [103]:
from labelbox.schema.bulk_import_request import BulkImportRequest

In [104]:
upload_job = BulkImportRequest.from_name(client, project.uid, name = "upload-1")
upload_job.wait_until_done()