In [1]:
# !pip install labelbox

In [1]:
import requests
from PIL import Image
import numpy as np
from io import BytesIO
from typing import Tuple
import json
import matplotlib.pyplot as plt
from tqdm import tqdm
import cv2 as cv

In [2]:
def get_image(uri: str) -> np.ndarray:
    return np.array(Image.open(BytesIO(requests.get(uri).content)))

In [3]:
def get_layer(uri: str) -> np.ndarray:
    return np.array(Image.open(BytesIO(requests.get(uri).content)))

In [4]:
def get_mask(objects: list) -> dict:
    layers = {}
    for obj in objects:
        title, uri = obj['title'], obj['instanceURI']
        layers[title] = get_layer(uri)
    return layers

In [10]:
def download(exports: list) -> Tuple[list, list]:
    masks = []
    images = []
    for export in tqdm(exports):
        if export['Label']:
            objects = export['Label']['objects']
            img_uri = export['Labeled Data']
            try:
                mask = get_mask(objects)
                image = get_image(img_uri)
                masks.append(mask)
                images.append(image)
            except:
                pass
    return images, masks

In [11]:
with open('export-2021-06-14T15_34_43.759Z.json', 'r') as file:
    exports = json.load(file)
    
images, masks = download(exports)

100%|██████████████████████████████████████████████████████████████████████████████████████████| 763/763 [42:08<00:00,  3.31s/it]


In [12]:
def get_sparse_mask(layers: dict) -> np.ndarray:
    indexes = {'oil': 0, 'emulsion': 1, 'water': 2}
    new_layers = []
    for layer_name, layer in layers.items():
        class_index = indexes[layer_name]
        grayscale_layer = cv.cvtColor(layer, cv.COLOR_RGBA2GRAY)
        max_value = grayscale_layer.max()
        if max_value != 0:
            new_layer = grayscale_layer / grayscale_layer.max() * class_index
        else:
            new_layer = np.nan
        new_layers.append(new_layer)
    mask = np.expand_dims(np.sum(new_layers, axis=0), axis=-1)
    return mask        

In [13]:
sparse_masks = [get_sparse_mask(mask) for mask in tqdm(masks)]
indexes_to_drop = []
for i, mask in enumerate(tqdm(sparse_masks)):
    if np.isnan(mask).sum() != 0:
        indexes_to_drop.append(i)
print(indexes_to_drop)

100%|█████████████████████████████████████████████████████████████████████████████████████████| 763/763 [00:01<00:00, 667.38it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████| 763/763 [00:00<00:00, 14387.50it/s]

[]





In [14]:
np.save('images', np.array(images))
np.save('masks', np.array(sparse_masks))