# Create training data using Labelbox
* Download images and animal annotations
* Upload them to labelbox using MAL
* Label to add any missing classes

In [1]:
from labelbox import Client, Project, Dataset
from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option
from labelbox import LabelingFrontend
import json
from collections import defaultdict
from PIL import Image
import cv2
import datetime
import os
import uuid
import numpy as np

## Download Images and Annotations
* The dataset contains images of animals in the wild and corresponding bounding boxes
* Read more about the dataset here: https://beerys.github.io/CaltechCameraTraps/

In [2]:
# Download data from here: https://beerys.github.io/CaltechCameraTraps/
# This file is 6GB so this might take a little while
if not os.path.exists('eccv_18_all_images_sm'):
    !wget http://www.vision.caltech.edu/~sbeery/datasets/caltechcameratraps18/eccv_18_all_images_sm.tar.gz
    !tar -zxf eccv_18_all_images_sm.tar.gz

In [3]:
# Download the annotations
if not os.path.exists('eccv_18_annotation_files'):
    !wget http://www.vision.caltech.edu/~sbeery/datasets/caltechcameratraps18/eccv_18_annotations.tar.gz
    !tar -zxf eccv_18_annotations.tar.gz

## Preprocess Data
* Select only day time images and a subset of possible animals
* Since the images are coming from video frames we split into train and eval datasets to account for this.
* Convert the data into a format that is compatible with labelbox

In [4]:
data = json.load(open('eccv_18_annotation_files/train_annotations.json'))
data['categories'] = {d['id'] : d for d in data['categories']}
annotations = defaultdict(lambda: [])

In [5]:
#Unique sequence of frames so the that the same animal isn't in the train and eval set by chance
#We also want different seq_ids so that they are all from different sequences (not same animal)

images = {}
ids = set()
for img in data['images']:
    if img['seq_id'] in ids:
        continue
    ids.add(img['seq_id'])
    images[img['id']] = img
data['images'] = images


for annotation in data['annotations']:
    if annotation.get('bbox') is None:
        if annotation['image_id'] in data['images']:
            del data['images'][annotation['image_id']]
        continue
    annotations[annotation['image_id']].append(annotation)

In [6]:
target_classes = {'dog', 'cat', 'deer', 'bobcat', 'fox'}


def process_image(image, min_bbox_height_px = 50 , min_bbox_width_px = 50):
    date_time_obj = datetime.datetime.strptime(image['date_captured'], '%Y-%m-%d %H:%M:%S')
    if (not ((18 > date_time_obj.hour > 7)) or (date_time_obj.hour == 12)):
        #Only train on day time images
        return
    
    annots = annotations[image['id']]
    im = None 
    box_coords = []
    
    for annot in annots:
        if not (data['categories'][annot['category_id']]['name'] in target_classes):
            return
            
        h, w = image['height'], image['width']
        bbox = annot.get('bbox')
        assert bbox is not None
        
        if bbox[0] < min_bbox_width_px or bbox[1] < min_bbox_height_px:
            #Ignore tiny bboxes
            return
        
        if (w - (bbox[0] + bbox[2])) < min_bbox_width_px or (h - (bbox[1] + bbox[3])) < min_bbox_height_px:
            return 
        
        if im is None:
            im = np.array(Image.open(os.path.join('eccv_18_all_images_sm', image['file_name'])))
            new_h, new_w = im.shape[:2]    
            
        scale = lambda x, y: (int((x / h) * new_h), int((y / w) * new_w))
        start_pt = scale(bbox[0], bbox[1])
        end_pt = scale(bbox[0] + bbox[2], bbox[1]+ bbox[3])
        box_coords.append([start_pt, end_pt])
    return im,box_coords, image['location']


In [7]:
examples = [process_image(ex) for ex in data['images'].values()]
examples = [ex for ex in examples if ex is not None]
print(len(examples))

191


* Write the data to file so that we can reference it later for uploads and metadata

In [8]:
if not os.path.exists("uploaded_images"):
    os.mkdir("uploaded_images")

if not os.path.exists("labels"): 
    os.mkdir("labels")
    
image_paths = []
for idx, example in enumerate(examples):
    imm, coords, location = example
    image_path = os.path.join("uploaded_images", f"{idx}.jpg")
    image_paths.append(image_path)
    Image.fromarray(imm).save(image_path)
    with open(os.path.join("labels", f"{idx}.json"), 'w') as file:
        file.write(json.dumps({'coords' : coords, 'location' : location}))


## Upload to Labelbox
* Setup a project
* Add the images to label
* Upload annotations using MAL
-----
For more information on this process checkout the example notebooks covering mal:
https://github.com/Labelbox/labelbox-python/tree/develop/examples#model-assisted-labeling

In [9]:
client = Client(api_key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2s0cTF2Z3djMHZwMDcwNHhoeDdtNHZrIiwib3JnYW5pemF0aW9uSWQiOiJja2s0cTF2Z2Fwc2F1MDczMjRhd25zanEyIiwiYXBpS2V5SWQiOiJja2t6bjd5dG5pZHNjMDcwNjczazIyamF1IiwiaWF0IjoxNjEyOTc0MjQ3LCJleHAiOjIyNDQxMjYyNDd9.GrGjHbN1w1X5-qLzlzp9UKCnkSffKqTQWEWIRyegHGg")

In [10]:
project = client.create_project(name = "animal_demo_proj")
dataset = client.create_dataset(name = "animal_demo_ds")
project.datasets.connect(dataset)
dataset.create_data_rows(image_paths)
project.enable_model_assisted_labeling()

True

In [11]:
editor = next(client.get_labeling_frontends(where=LabelingFrontend.name == "Editor"))


ontology_builder = OntologyBuilder(tools=[
    Tool(tool=Tool.Type.BBOX, name="person"),
    Tool(tool=Tool.Type.BBOX, name="animal")
])

project.setup(editor, ontology_builder.asdict())

# fetch ontology from api to get all of the ids
ontology = ontology_builder.from_project(project)
schema_lookup = {tool.name: tool.feature_schema_id for tool in ontology.tools}

In [12]:
def get_labels(dr):
    label_name = dr.external_id.split('/')[-1].replace('.jpg', '.json')
    label_name = f"labels/{label_name}"
    labels = json.load(open(label_name))
    return labels

In [13]:
datarows = [dr for dr in list(project.datasets())[0].data_rows()]

In [14]:
boxes = []
for datarow in datarows:
    label = get_labels(datarow)['coords'][0]
    row = {
        'uuid' : str(uuid.uuid4()),
        'schemaId' : schema_lookup['animal'],
        'dataRow' : {'id' : datarow.uid},
        'bbox' : {
            'top' : label[0][1],
            'left' : label[0][0],
            'height' : label[1][1] - label[0][1],
            'width' : label[1][0] - label[0][0]            
        }
    }
    boxes.append(row)
    

In [16]:
upload = project.upload_annotations(name = f"upload-{uuid.uuid4()}", annotations = boxes)
upload.wait_until_done()

## Go to labelbox and label
* Most of the data is prelabeled so we just need to go through and make sure everything is correct
* None of the people in the images have been labeled so we are also going to add those annotations

In [17]:
print(f"https://app.labelbox.com/projects/{project.uid}/overview")

https://app.labelbox.com/projects/ckq6zvmwm8sko0ya4fevdgsbf/overview
