# Download and Convert Labels from Scale into VOC Format

This notebook looks for files like `task-5e53f31f4e39270010025f6f.json` from Scale, then downloads, renames, and formats the labels consistent with the existing dataset in Pascal VOC format.

In [None]:
import os
import cv2
import json
import glob
import urllib.request

import os.path as osp

import numpy as np
import PIL.Image

In [None]:
def lblsave(filename, lbl):
    import imgviz

    if osp.splitext(filename)[1] != '.png':
        filename += '.png'
    # Assume label ranges [-1, 254] for int32,
    # and [0, 255] for uint8 as VOC.
    if lbl.min() >= -1 and lbl.max() < 255:
        lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode='P')
        colormap = imgviz.label_colormap()
        lbl_pil.putpalette(colormap.flatten())
        lbl_pil.save(filename)
    else:
        raise ValueError(
            '[%s] Cannot save the pixel-wise class label as PNG. '
            'Please consider using the .npy format.' % filename
        )

In [None]:
#scale_tasks_path = '/scratch/ssd/gallowaa/cciw/dataset_raw_v0-2-x/Test/GLNI/land/scale/' # 18
#scale_tasks_path = '/scratch/ssd/gallowaa/cciw/dataset_raw_v0-2-x/Test/GLNI/port/scale/' # 4
#scale_tasks_path = '/scratch/ssd/gallowaa/cciw/dataset_raw_v0-2-x/Test/WHERD/scale/' # 6
#scale_tasks_path = '/scratch/ssd/gallowaa/cciw/dataset_raw_v0-2-x/Test/Tripod/503/' # 1
#scale_tasks_path = '/scratch/ssd/gallowaa/cciw/dataset_raw/Train/2017-08-bak-16-good-labels/' # 3
#scale_tasks_path = '/scratch/ssd/gallowaa/cciw/dataset_raw/Test/Lab' # 3
#scale_tasks_path = '/scratch/ssd/gallowaa/cciw/dataset_raw/Train/2018-06/land' # 14

#scale_tasks_path = osp.join(os.environ['DATA_PATH'], 'cciw/dataset_raw/Train/2016-07/')
scale_tasks_path = osp.join(os.environ['DATA_PATH'], 'cciw/dataset_raw/Train/2017-06/to_label/')

task_list = glob.glob(osp.join(scale_tasks_path, 'task-*.json'))
print(len(task_list))
task_list

In [None]:
for i in range(len(task_list)):

    with open(task_list[i], 'r') as f:
        att = json.load(f)

    source_image_name = att['params']['attachment']
    source_image_root = source_image_name.split('/')[-1].split('.')[0]

    outfile = os.path.join(scale_tasks_path, source_image_root)

    """This will download the pink and green mask from scale, 
    but this is overwritten by the VOC format mask."""
    file, http_msg = urllib.request.urlretrieve(
        att['response']['annotations']['combined']['image'], filename=outfile + '_scale.png')

    np_img = cv2.imread(outfile + '_scale.png') # in BlueGreenRed (BGR) format

    lbl = np.zeros((np_img.shape[0], np_img.shape[1]))
    lbl[(np_img[:, :, 2] ==  60)] = 1

    lblsave(outfile + '.png', lbl) # save as indexed color RGB image
    #cv2.imwrite(outfile + '.png', np_img) # this is incorrect and saves as a normal RGB image
    print('Saved item %d' % i, outfile)