In [1]:
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import pycocotools.mask as mask_utils

In [2]:
def rle_decode(mask_rle, shape):
    """
    Decodes run-length encoded segmentation mask string into 2d array

    Parameters
    ----------
    :param rle_mask (str): Run-length encoded segmentation mask string.
    :param shape (tuple): (height, width) of array to return
    :return mask [numpy.ndarray of shape (height, width)]: Decoded 2d segmentation mask
    """
    # Splits the RLE string into a list of string by whitespaces.
    s = mask_rle.split()

    # This creates two numpy arrays, one with the RLE starts and one with their respective lengths
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]

    # To obtain the end point we need to substract 1 to the length or start because the initial point counts.
    starts -= 1
    ends = starts + lengths

    # Create a 1D array of size H*W of zeros
    mask = np.zeros(shape[0]*shape[1], dtype=np.uint8)

    # Fill this array with ones in the positions where there is a mask using the RLE information
    for start, end in zip(starts, ends):
        mask[start:end] = 1

    # Reshape the 1D array into a 2D array so we can finally get the binary 2D mask.
    mask = mask.reshape(shape)
    return mask.T

In [3]:
def binary_mask_to_rle(binary_mask):
    """
    Checkout: https://cocodataset.org/#format-results
    :param mask [numpy.ndarray of shape (height, width)]: Decoded 2d segmentation mask

    This function returns the following dictionary:
    {
        "counts": encoded mask suggested by the official COCO dataset webpage.
        "size": the size of the input mask/image
    }
    """
    # Create dictionary for the segmentation key in the COCO dataset
    rle = {'counts': [], 'size': list(binary_mask.shape)}
    # We need to convert it to a Fortran array
    binary_mask_fortran = np.asfortranarray(binary_mask)
    # Encode the mask as specified by the official COCO format
    encoded_mask = mask_utils.encode(binary_mask_fortran)
    # We must decode the byte encoded string or otherwise we cannot save it as a JSON file
    rle["counts"] = encoded_mask["counts"].decode()
    return rle

In [4]:
def generate_coco_from_df(df, image_height=256, image_width=256, include_background=True):
    images = []
    annotations = []
    categories = [{'id': i+1, 'name': f'class_{i+1}'} for i in range(4)]

    if include_background:
        categories.append({'id': 0, 'name': 'no_object'})  # Class 5

    ann_id = 1

    for img_idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing Images", ncols=100):
        filename = row['ImageId']
        image_id = img_idx + 1

        images.append({
            'id': image_id,
            'file_name': filename,
            'height': image_height,
            'width': image_width
        })

        has_annotation = False

        for class_id in range(1, 5):
            encoded = row.get(f'EncodedPixels{class_id}')
            if pd.isna(encoded) or encoded == '':
                continue

            has_annotation = True
            mask = rle_decode(encoded, shape=(image_height, image_width))
            rle = mask_utils.encode(np.asfortranarray(mask))
            rle["counts"] = rle["counts"].decode()
            area = int(mask_utils.area(rle))
            bbox = list(map(int, mask_utils.toBbox(rle)))

            annotations.append({
                'id': ann_id,
                'image_id': image_id,
                'category_id': class_id,
                'segmentation': rle,
                'area': area,
                'bbox': bbox,
                'iscrowd': 0
            })
            ann_id += 1

        if not has_annotation and include_background:
            full_mask = np.ones((image_height, image_width), dtype=np.uint8)
            rle = mask_utils.encode(np.asfortranarray(full_mask))
            rle["counts"] = rle["counts"].decode()
            area = int(mask_utils.area(rle))
            bbox = list(map(int, mask_utils.toBbox(rle)))
            annotations.append({
                'id': ann_id,
                'image_id': image_id,
                'category_id': 0,  # 'no_object'
                'segmentation': rle,
                'area': area,
                'bbox': bbox,
                'iscrowd': 0
            })
            ann_id += 1

    return {
        'images': images,
        'annotations': annotations,
        'categories': categories
    }

In [5]:
height = 256
width = 256
data_path = '/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/train_subimages'

In [6]:
df = pd.read_csv('/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/subimages.csv', index_col=0)
df.head()

Unnamed: 0,ImageId,EncodedPixels1,EncodedPixels2,EncodedPixels3,EncodedPixels4
0,/home/eas/Enol/pycharm_projects/clipseg_steel_...,,,,
1,/home/eas/Enol/pycharm_projects/clipseg_steel_...,,,,
2,/home/eas/Enol/pycharm_projects/clipseg_steel_...,,,,
3,/home/eas/Enol/pycharm_projects/clipseg_steel_...,,,,
4,/home/eas/Enol/pycharm_projects/clipseg_steel_...,,,64001 64 64257 192 64513 1024,


In [7]:
COCO_json = generate_coco_from_df(df, height, width, include_background=True)

Processing Images: 100%|████████████████████████████████████| 84946/84946 [00:15<00:00, 5487.72it/s]


In [8]:
with open('/home/eas/Enol/pycharm_projects/clipseg_steel_defect/Severstal/annotations_COCO.json', 'w') as f:
    json.dump(COCO_json, f)