In [5]:
from itertools import groupby

import numpy as np

binary_mask = np.zeros((1000, 1000), dtype=bool)
binary_mask[100:200, 100:200] = 1
binary_mask[300:400, 300:400] = 1
binary_mask[700:800, 700:800] = 1

# Specific library

In [6]:
from rle import encode

%timeit encode(binary_mask.ravel())

961 ms ± 15.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
from pycocotools.mask import encode

%timeit encode(np.asfortranarray(binary_mask))

1.5 ms ± 18.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


# Solutions on the interweb

In [8]:
# https://www.kaggle.com/hackerpoet/even-faster-run-length-encoder

def binary_array_to_rle(img):
    flat_img = img.flatten()
    flat_img = np.where(flat_img > 0.5, 1, 0).astype(np.uint8)

    starts = np.array((flat_img[:-1] == 0) & (flat_img[1:] == 1))
    ends = np.array((flat_img[:-1] == 1) & (flat_img[1:] == 0))
    starts_ix = np.where(starts)[0] + 2
    ends_ix = np.where(ends)[0] + 2
    lengths = ends_ix - starts_ix

    return starts_ix, lengths


%timeit binary_array_to_rle(binary_mask)

2.11 ms ± 66.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [9]:
# https://stackoverflow.com/questions/49494337/encode-numpy-array-using-uncompressed-rle-for-coco-dataset/49547872#49547872
def binary_array_to_rle(binary_mask):
    rle = {"counts": [], "size": list(binary_mask.shape)}
    counts = rle.get("counts")
    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order="F"))):
        if i == 0 and value == 1:
            counts.append(0)
        counts.append(len(list(elements)))
    return rle


%timeit binary_array_to_rle(binary_mask)

18.3 ms ± 331 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [10]:
# https://stackoverflow.com/questions/49494337/encode-numpy-array-using-uncompressed-rle-for-coco-dataset/62208173#62208173
def binary_array_to_rle(binary_mask):
    rle = {"counts": [], "size": list(binary_mask.shape)}
    counts = rle.get("counts")

    last_elem = 0
    running_length = 0

    for i, elem in enumerate(binary_mask.ravel(order="F")):
        if elem == last_elem:
            pass
        else:
            counts.append(running_length)
            running_length = 0
            last_elem = elem
        running_length += 1

    counts.append(running_length)

    return rle


%timeit binary_array_to_rle(binary_mask)

884 ms ± 14.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Variation for multiclass mask

In [11]:
mask = np.zeros((1000, 1000), dtype=np.uint8)
mask[100:200, 100:200] = 1
mask[300:400, 300:400] = 2
mask[700:800, 700:800] = 4

In [12]:
def array_to_rle(mask: np.array) -> dict:
    shape = mask.shape
    mask = np.ravel(mask)
    # [(val1, cnt1), (val2, cnt2), …]
    val_cnt = [(val, len(list(cnt))) for val, cnt in groupby(mask)]
    # [(val1, val2), (cnt1, cnt2), …]
    val_cnt = list(zip(*val_cnt))
    return {"values": val_cnt[0], "counts": val_cnt[1], "size": shape}


%timeit array_to_rle(mask)

51.6 ms ± 211 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [13]:
def array_to_rle(mask: np.array) -> dict:
    shape = mask.shape
    mask = np.ravel(mask)
    # [(val1, cnt1), (val2, cnt2), …]
    val_cnt = ((val, len(list(cnt))) for val, cnt in groupby(mask))  # generator
    # [(val1, val2), (cnt1, cnt2), …]
    val_cnt = list(zip(*val_cnt))
    return {"values": val_cnt[0], "counts": val_cnt[1], "size": shape}


%timeit array_to_rle(mask)

53.2 ms ± 1.54 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
def array_to_rle(mask: np.array) -> dict:
    shape = mask.shape
    mask = np.ravel(mask)
    # [(val1, cnt1), (val2, cnt2), …]
    val_cnt = (
        (val, sum(1 for _ in cnt)) for val, cnt in groupby(mask)
    )  # for loop to reduce mem consumption
    # [(val1, val2), (cnt1, cnt2), …]
    val_cnt = list(zip(*val_cnt))
    return {"values": val_cnt[0], "counts": val_cnt[1], "size": shape}


%timeit array_to_rle(mask)

68.7 ms ± 881 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [17]:
def array_to_rle(mask_array: np.ndarray) -> dict:
    # 8.53s!!
    rle = {"size": mask_array.shape}
    mask_array = mask_array.ravel(order='F')
    pad_array = mask_array
    pad_array = np.append([pad_array[0] + 1], pad_array)
    pad_array = np.append(pad_array, [pad_array[-1] + 1])
    start = np.where(pad_array[1:] != pad_array[:-1])[0]
    rle["values"] = tuple(mask_array[start[:-1]].tolist())
    rle["counts"] = tuple((start[1:] - start[:-1]).tolist())
    return rle

%timeit array_to_rle(mask)

3.44 ms ± 80 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
