# Labelbox Export and Convert

This notebook will: 
- downloaded the labels from a Labelbox project
- convert them into a format that works for training

## Labelbox Export

In [13]:
# Imports and configuration 
import fiftyone as fo

labelboxExportJson =  "./export-2021-01-28T14-03-02.769Z.json" # Download the exported JSON and update this
mediaDir = "./media"

In [24]:
# Do the groundwork for importing, setup the dataset
import fiftyone.utils.labelbox as foul
from uuid import uuid4
labelbox_id_field = "labelbox_id"

# expect an error here if the dataset already exists
dataset = fo.Dataset(name="skyscan")
dataset.persistent = True

# This seems dumb, but it looks like you have to create a dummy record so you can have an instance with that field name
sample = fo.Sample(filepath="./sample.jpg")
dataset.add_samples([sample])
for sample in dataset:
    print(sample)
    sample[labelbox_id_field] = str(uuid4())
    sample.save()

<Sample: {
    'id': '6012ce974909229d4aef6675',
    'media_type': 'image',
    'filepath': '/Users/lberndt/Projects/Plane-Tracker/SkyScan/voxel51-import/sample.jpg',
    'tags': BaseList([]),
    'metadata': None,
}>
 100% |████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3798/3798 [30.1m elapsed, 0s remaining, 2.2 samples/s]      


In [None]:
# Imports the Data from Labelbox into a Voxel51 Dataset
foul.import_from_labelbox(dataset, labelboxExportJson, download_dir=mediaDir, labelbox_id_field=labelbox_id_field)

In [60]:
# It looks like the importer is very niave. It will download the file, and simply give it the filename in the URL
# However the URL has a lot of parameters after the filename. The end result is that the saved file no longer has an extension
# This will rename the file, remove that crap, update the metadata with the correct filename, copy the file, and save it to the DB.
# There is still some sort of Error using the UI, but all the other functions seem to work.

import os
import shutil
from pathlib import Path

for sample in dataset:
    filename = Path(sample.filepath).name
    if filename.find('?') != -1:
        correctFilename = filename.split('?')[0]
        print("Correct Filename is: " + correctFilename)
        correctFilepath = str(Path(sample.filepath).parent) + "/" + correctFilename
        print("Correct Filepath is: " + correctFilepath)
        shutil.copyfile(sample.filepath, correctFilepath)
        sample["filepath"] = correctFilepath
        sample["metadata"]["mime_type"] = "image/jpeg"
        sample.save()

In [43]:
# Add a label & tag that captures if the image was skipped, indicating there was no plane, or accepted, indicating there was a plane
from fiftyone import ViewField as F
label_field = "plane_ground_truth" 

model_view = dataset.exists("model")
for sample in model_view:
    sample[label_field] = fo.Classification(label="plane")
    sample.tags.append("plane")
    sample.save()


skipped_view = dataset.match({"model": {"$exists": False, "$eq": None}})
for sample in skipped_view:
    #print(sample)
    sample[label_field] = fo.Classification(label="noplane")
    sample.tags.append("noPlane")
    sample.save()    

In [59]:
# This exports the images into a format that Keras is happy with
# It will create a dir structure based on the plane_ground_truth label
# images with a Plane in them will be placed in one DIR and those without will be placed in another
export_dir = "./export"
 # for example

# Export the dataset
dataset.export(
    export_dir=export_dir,
    dataset_type=fo.types.ImageClassificationDirectoryTree,
    label_field=label_field,
)

 100% |████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3798/3798 [6.2s elapsed, 0s remaining, 603.4 samples/s]      
