In [1]:
from itertools import groupby

import numpy as np

In [2]:
binary_mask = np.zeros((1000, 1000), dtype=bool)
binary_mask[100:200, 100:200] = 1
binary_mask[300:400, 300:400] = 1
binary_mask[700:800, 700:800] = 1

# Specific librarys

## python-rle

In [3]:
%pip install --quiet python-rle

In [4]:
from rle import encode as rle_encode, decode as rle_decode

In [5]:
%timeit rle = rle_encode(binary_mask.ravel())

1.02 s ± 32.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
rle = rle_encode(binary_mask.ravel())

In [7]:
%timeit rle_decode(*rle)

956 ms ± 27.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%pip uninstall --quiet --yes python-rle 

## pycocotools

In [9]:
%pip install --quiet pycocotools

In [10]:
from pycocotools.mask import encode as pycoco_encode, decode as pycoco_decode

In [11]:
%timeit pycoco_encode(np.asfortranarray(binary_mask))

1.22 ms ± 208 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [12]:
rle = pycoco_encode(np.asfortranarray(binary_mask))

In [13]:
%timeit pycoco_decode(rle)

759 µs ± 103 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [14]:
%pip uninstall --quiet --yes pycocotools

# Solutions on the interweb

In [15]:
# https://www.kaggle.com/hackerpoet/even-faster-run-length-encoder

def binary_array_to_rle_0(img):
    flat_img = img.flatten()
    flat_img = np.where(flat_img > 0.5, 1, 0).astype(np.uint8)

    starts = np.array((flat_img[:-1] == 0) & (flat_img[1:] == 1))
    ends = np.array((flat_img[:-1] == 1) & (flat_img[1:] == 0))
    starts_ix = np.where(starts)[0] + 2
    ends_ix = np.where(ends)[0] + 2
    lengths = ends_ix - starts_ix

    return starts_ix, lengths


%timeit binary_array_to_rle_0(binary_mask)

4.31 ms ± 407 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [16]:
# https://stackoverflow.com/questions/49494337/encode-numpy-array-using-uncompressed-rle-for-coco-dataset/49547872#49547872
def binary_array_to_rle_1(binary_mask):
    rle = {"counts": [], "size": list(binary_mask.shape)}
    counts = rle.get("counts")
    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order="F"))):
        if i == 0 and value == 1:
            counts.append(0)
        counts.append(len(list(elements)))
    return rle


%timeit binary_array_to_rle_1(binary_mask)

36.7 ms ± 13.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [17]:
# https://stackoverflow.com/questions/49494337/encode-numpy-array-using-uncompressed-rle-for-coco-dataset/62208173#62208173
def binary_array_to_rle_2(binary_mask):
    rle = {"counts": [], "size": list(binary_mask.shape)}
    counts = rle.get("counts")

    last_elem = 0
    running_length = 0

    for i, elem in enumerate(binary_mask.ravel(order="F")):
        if elem == last_elem:
            pass
        else:
            counts.append(running_length)
            running_length = 0
            last_elem = elem
        running_length += 1

    counts.append(running_length)

    return rle


%timeit binary_array_to_rle_2(binary_mask)

1.06 s ± 62 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Variation for multiclass mask

(that I tried a long time ago, and very likely inspired our library)

In [18]:
mask = np.zeros((1000, 1000), dtype=np.uint8)
mask[100:200, 100:200] = 1
mask[300:400, 300:400] = 2
mask[700:800, 700:800] = 4

In [19]:
def array_to_rle_0(mask: np.array) -> dict:
    shape = mask.shape
    mask = np.ravel(mask)
    # [(val1, cnt1), (val2, cnt2), …]
    val_cnt = [(val, len(list(cnt))) for val, cnt in groupby(mask)]
    # [(val1, val2), (cnt1, cnt2), …]
    val_cnt = list(zip(*val_cnt))
    return {"values": val_cnt[0], "counts": val_cnt[1], "size": shape}


%timeit array_to_rle_0(mask)

64.4 ms ± 6.52 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [20]:
def array_to_rle_1(mask: np.array) -> dict:
    shape = mask.shape
    mask = np.ravel(mask)
    # [(val1, cnt1), (val2, cnt2), …]
    val_cnt = ((val, len(list(cnt))) for val, cnt in groupby(mask))  # generator
    # [(val1, val2), (cnt1, cnt2), …]
    val_cnt = list(zip(*val_cnt))
    return {"values": val_cnt[0], "counts": val_cnt[1], "size": shape}


%timeit array_to_rle_1(mask)

73.3 ms ± 2.81 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [21]:
def array_to_rle_2(mask: np.array) -> dict:
    shape = mask.shape
    mask = np.ravel(mask)
    # [(val1, cnt1), (val2, cnt2), …]
    val_cnt = (
        (val, sum(1 for _ in cnt)) for val, cnt in groupby(mask)
    )  # for loop to reduce mem consumption
    # [(val1, val2), (cnt1, cnt2), …]
    val_cnt = list(zip(*val_cnt))
    return {"values": val_cnt[0], "counts": val_cnt[1], "size": shape}


%timeit array_to_rle_2(mask)

110 ms ± 12.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [22]:
def array_to_rle_3(mask_array: np.ndarray) -> dict:
    rle = {"size": mask_array.shape}
    mask_array = mask_array.ravel(order='F')
    pad_array = mask_array
    pad_array = np.append([pad_array[0] + 1], pad_array)
    pad_array = np.append(pad_array, [pad_array[-1] + 1])
    start = np.where(pad_array[1:] != pad_array[:-1])[0]
    rle["values"] = tuple(mask_array[start[:-1]].tolist())
    rle["counts"] = tuple((start[1:] - start[:-1]).tolist())
    return rle

%timeit array_to_rle_3(mask)

6.07 ms ± 391 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Checking our library

In [23]:
from multiclass_rle import array_to_multi_class_rle, multi_class_rle_to_array

In [24]:
%timeit array_to_multi_class_rle(mask)

6.16 ms ± 235 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [25]:
rle = array_to_multi_class_rle(mask)

In [26]:
%timeit multi_class_rle_to_array(rle)

381 µs ± 43.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


So… on my home computer… 

1. multi-class encoding is 5 times slower than the fastest binary encoding (pycocotools)  
   BUT if you have N classes and use binary encoding, then you will decode N times: so for N>5, our approach will be faster.
2. multi-class decoding is 2 times faster than the fastest binary encoding (pycocotools)  
   AND decoding is the most critical part, since it is done in the training loop (one decoding for each epochs); while the encoding is one only once during the dataset praparation. 

So these are quite good results we've got here!