In [1]:
import os
import json

from tqdm.auto import tqdm

import numpy as np

import ast
from PIL import Image
from io import BytesIO

In [2]:
DATA_DIR = "data/cleaned_aida"

In [3]:
def string_to_bytes(string):
    return ast.literal_eval(string)

def parse_masks(png_mask_strs):
    masks = []
    for mask_str in png_mask_strs:
        mask_bytes = string_to_bytes(mask_str)
        mask = Image.open(BytesIO(mask_bytes))
        mask = mask.convert("L")
        masks.append(mask.getdata())
    
    return masks

def construct_masked_image(image_data):
    w, h = image_data["width"], image_data["height"]
    masks = parse_masks(image_data["png_masks"])

    masked_image = Image.new(mode="L", size=(w, h))
    base_mask = [0 for _ in range(w * h)]
    for mask in masks:
        for idx, m in enumerate(mask):
            if m != 0:
                base_mask[idx] = m
    masked_image.putdata(base_mask)
    
    return masked_image

In [6]:
for filename in tqdm(os.listdir(DATA_DIR)):
    fp = os.path.join(DATA_DIR, filename)
    if not os.path.isdir(fp):
        continue
    
    masked_image_path = os.path.join(fp, "masked_image.png")
    if os.path.exists(masked_image_path):
        os.remove(masked_image_path)

    with open(os.path.join(fp, "metadata.json")) as f:
        metadata = json.load(f)
    
    masked_image = construct_masked_image(metadata['image_data'])
    masked_image.save(masked_image_path)

  0%|          | 0/10003 [00:00<?, ?it/s]