# Create training data using Labelbox
* Download images and animal annotations
* Upload them to labelbox using MAL

In [None]:
import labelbox as lb
import json
from collections import defaultdict
from PIL import Image
import datetime
import os
import uuid
import numpy as np

## Download Images and Annotations
* The dataset contains images of animals in the wild and corresponding bounding boxes
* Read more about the dataset here: https://beerys.github.io/CaltechCameraTraps/

In [None]:
# Download data from here: https://beerys.github.io/CaltechCameraTraps/
# This file is 6GB so this might take a little while
if not os.path.exists('eccv_18_all_images_sm'):
    !wget http://www.vision.caltech.edu/~sbeery/datasets/caltechcameratraps18/eccv_18_all_images_sm.tar.gz
    !tar -zxf eccv_18_all_images_sm.tar.gz

In [None]:
# Download the annotations
if not os.path.exists('eccv_18_all_annotations'):
    !wget http://www.vision.caltech.edu/~sbeery/datasets/caltechcameratraps18/eccv_18_all_annotations.tar.gz
    !tar -zxf eccv_18_all_annotations.tar.gz

## Preprocess Data
* Sample images from video sequences
* Select only day time images and a subset of possible animals
* Convert the data into a format that is compatible with labelbox

In [None]:
data = json.load(open('CaltechCameraTrapsECCV18.json'))
data['categories'] = {d['id'] : d for d in data['categories']}
annotations = defaultdict(lambda: [])

In [None]:
# One image per video sequence to reduce correlation between training/testing images.

images = {}
ids = set()
for img in data['images']:
    if img['seq_id'] in ids:
        continue
    ids.add(img['seq_id'])
    images[img['id']] = img
data['images'] = images


for annotation in data['annotations']:
    if annotation.get('bbox') is None:
        if annotation['image_id'] in data['images']:
            del data['images'][annotation['image_id']]
        continue
    annotations[annotation['image_id']].append(annotation)

In [None]:
# These ids correspond to locations with a lot of people in the images that we can label
target_locations = {0,125,120}
target_classes = {'dog', 'cat', 'deer', 'bobcat', 'fox'}
min_border_distance = 50


def process_image(image):
    date_time_obj = datetime.datetime.strptime(image['date_captured'], '%Y-%m-%d %H:%M:%S')
    if (not ((18 > date_time_obj.hour > 7)) or (date_time_obj.hour == 12)):
        #Only train on day time images
        return
    
    if image['location'] not in target_locations:
        return
    
    annots = annotations[image['id']]
    im = None 
    box_coords = []
    
    for annot in annots:
        if not (data['categories'][annot['category_id']]['name'] in target_classes):
            return
            
        h, w = image['height'], image['width']
        bbox = annot.get('bbox')
        assert bbox is not None
        
        # Don't train on images where the animal is on the edge of the image
        if bbox[0] < min_border_distance or bbox[1] < min_border_distance:
            return
        
        if (w - (bbox[0] + bbox[2])) < min_border_distance or (h - (bbox[1] + bbox[3])) < min_border_distance:
            return 
        
        if im is None:
            im = np.array(Image.open(os.path.join('eccv_18_all_images_sm', image['file_name'])))
            new_h, new_w = im.shape[:2]    
            
        scale = lambda x, y: (int((x / h) * new_h), int((y / w) * new_w))
        start_pt = scale(bbox[0], bbox[1])
        end_pt = scale(bbox[0] + bbox[2], bbox[1]+ bbox[3])
        box_coords.append([start_pt, end_pt])
    return im,box_coords, image['location']


In [None]:
examples = [process_image(ex) for ex in data['images'].values()]
examples = [ex for ex in examples if ex is not None]
print(len(examples))

* Write the data to file so that we can reference it later for uploads and metadata

In [None]:
if not os.path.exists("uploaded_images"):
    os.mkdir("uploaded_images")

if not os.path.exists("labels"): 
    os.mkdir("labels")
    
image_paths = []

for idx, example in enumerate(examples):
    imm, coords, location = example
    image_path = os.path.join("uploaded_images", f"{idx}.jpg")
    image_paths.append(image_path)
    Image.fromarray(imm).save(image_path)
    with open(os.path.join("labels", f"{idx}.json"), 'w') as file:
        file.write(json.dumps({'coords' : coords, 'location' : location}))


## Upload to Labelbox
* Setup a project
* Add the images to label
* Upload annotations using MAL
-----
For more information on this process checkout the example notebooks covering mal:
https://github.com/Labelbox/labelbox-python/tree/master/examples#model-assisted-labeling

In [None]:
client = lb.Client()

In [None]:
project = client.create_project(name = "animal_demo_proj", media_type=lb.MediaType.Image)
dataset = client.create_dataset(name = "animal_demo_ds")
dataset.create_data_rows(image_paths)
project.create_batches_from_dataset("batch", dataset.uid)
project.enable_model_assisted_labeling()

In [None]:
editor = next(client.get_labeling_frontends(where=lb.LabelingFrontend.name == "Editor"))


ontology_builder = lb.OntologyBuilder(tools=[
    lb.Tool(tool=lb.Tool.Type.BBOX, name="person"),
    lb.Tool(tool=lb.Tool.Type.BBOX, name="animal")
])

project.setup(editor, ontology_builder.asdict())

# fetch ontology from api to get all of the ids
ontology = ontology_builder.from_project(project)

In [None]:
def get_labels(dr):
    label_name = dr.external_id.split('/')[-1].replace('.jpg', '.json')
    label_name = f"labels/{label_name}"
    labels = json.load(open(label_name))
    return labels

In [None]:
datarows = []
for batch in list(project.batches()):
    datarows.extend(list(batch.export_data_rows()))

In [None]:
boxes = []
for datarow in datarows:
    label = get_labels(datarow)['coords'][0]
    row = {
        'name' : 'animal',
        'dataRow' : {'id' : datarow.uid},
        'bbox' : {
            'top' : label[0][1],
            'left' : label[0][0],
            'height' : label[1][1] - label[0][1],
            'width' : label[1][0] - label[0][0]            
        }
    }
    boxes.append(row)
    

In [None]:
upload = project.upload_annotations(name = f"upload-{uuid.uuid4()}", annotations = boxes)
upload.wait_until_done()

## Go to Labelbox and Label
* Most of the animal data is prelabeled we want to go through and make sure everything is correct
    * Make sure to use the hot keys to label quickly!
    * 'e' submits the image
    * '1' selects the person bounding box
    * '2' selects the animal bounding box
    * There are other helpful ones too! Check out the keyboard shortcuts panel in the top right of the editor.
* None of the people in the images have been labeled so we are also going to add those annotations

In [None]:
print(f"https://app.labelbox.com/projects/{project.uid}/overview")