### This file is to create json file from [crello dataset](https://github.com/CyberAgentAILab/canvas-vae/blob/main/docs/crello-dataset.md).
- We use the crello-dataset-v2 in this work.
- We offer a json creator and a json parser demos below.

In [1]:
import tensorflow as tf
from typing import Any, Dict
from PIL import Image
import io
import os
import math
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import Image as ipyImage
from ipycanvas import Canvas
import time
import json
from base64 import b64encode, b64decode

JSON_FILE_PATH = '../data/samples/crello_samples'

2023-09-27 04:29:36.395025: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.11.0


In [2]:
dataset = tf.data.Dataset.list_files('../data/crello-dataset-v2/test-*.tfrecord')
dataset = tf.data.TFRecordDataset(dataset)

2023-09-27 04:29:38.418926: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2023-09-27 04:29:39.122947: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-09-27 04:29:39.123648: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:00:04.0 name: Tesla T4 computeCapability: 7.5
coreClock: 1.59GHz coreCount: 40 deviceMemorySize: 14.75GiB deviceMemoryBandwidth: 298.08GiB/s
2023-09-27 04:29:39.123711: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.11.0
2023-09-27 04:29:39.127246: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.11
2023-09-27 04:29:39.128827: I tensorflow/stream_executor/platform/default/d

In [3]:
def parse(serialized: bytes) -> Dict[str, tf.Tensor]:
    """Explicitly parse specified fields."""
    context, sequence, _ = tf.io.parse_sequence_example(
        serialized,
        context_features={
            "id": tf.io.FixedLenFeature((), tf.string),
            "group": tf.io.FixedLenFeature((), tf.string),
            "format": tf.io.FixedLenFeature((), tf.string),
            "category": tf.io.FixedLenFeature((), tf.string),
            "canvas_height": tf.io.FixedLenFeature((), tf.int64),
            "canvas_width": tf.io.FixedLenFeature((), tf.int64),
            "length": tf.io.FixedLenFeature((), tf.int64),
        },
        sequence_features={
            "type": tf.io.FixedLenSequenceFeature((), tf.string),
            "left": tf.io.FixedLenSequenceFeature((), tf.float32),
            "top": tf.io.FixedLenSequenceFeature((), tf.float32),
            "width": tf.io.FixedLenSequenceFeature((), tf.float32),
            "height": tf.io.FixedLenSequenceFeature((), tf.float32),
            "color": tf.io.FixedLenSequenceFeature((3,), tf.int64),
            "opacity": tf.io.FixedLenSequenceFeature((), tf.float32),
            "image_bytes": tf.io.FixedLenSequenceFeature((), tf.string),
            "text": tf.io.FixedLenSequenceFeature((), tf.string),
            "font": tf.io.FixedLenSequenceFeature((), tf.string),
            "font_size": tf.io.FixedLenSequenceFeature((), tf.float32),
            "text_align": tf.io.FixedLenSequenceFeature((), tf.string),
            "angle": tf.io.FixedLenSequenceFeature((), tf.float32),
        })
    return {**context, **sequence}

#### Json Creator

In [4]:
dataset_parse = dataset.map(parse)
index = 0
sampleID = 276 # reset sampleID to get more samples
for example in dataset_parse.as_numpy_iterator():
    index += 1
    if index == sampleID:
        elementTypes = [i.decode("utf-8") for i in example['type']]
        canvas = {
            'id': example['id'].decode("utf-8"),
            'group': example['group'].decode("utf-8"),
            'format': example['format'].decode("utf-8"),
            'category': example['category'].decode("utf-8"),
            'canvas_width': int(example['canvas_width']),
            'canvas_height': int(example['canvas_height']),
            'length': int(example['length']),
            'types': elementTypes,
        }
        for i in range(len(example['type'])):
            element = {
                'type': example['type'][i].decode("utf-8"),
                'left': float(example['left'][i]),
                'top': float(example['top'][i]),
                'width': float(example['width'][i]),
                'height': float(example['height'][i]),
                'color': example['color'][i].tolist(),
                'opacity': float(example['opacity'][i]),
                'image_bytes': b64encode(example['image_bytes'][i]).decode("utf-8"),
                'text': example['text'][i].decode("utf-8"),
                'font': example['font'][i].decode("utf-8"),
                'font_size': float(example['font_size'][i]),
                'text_align': example['text_align'][i].decode("utf-8"),
                'angle': float(example['angle'][i]),
                'depth': i,
            }
            canvas[f'element_{i}'] = element
        # print(canvas) 
        with open(os.path.join(JSON_FILE_PATH, f'json_sample_{sampleID}.json'), 'w') as outfile:
            json.dump(canvas, outfile)

#### Json parser

In [5]:
# sampleID = 1
def line_break(text, height, font_size):
    text = text.replace('\n', ' ')
    lines = round(height / font_size)
    words = text.split(' ')
    lineWordNum = math.ceil(len(words) / lines)
    newText = ''
    idx = 0
    print(f'lines:{lines}, lineWordNum:{lineWordNum}')
    for w in words:
        newText += w
        idx += 1
        if idx % lineWordNum == 0:
            newText += '\n'
        else:
            newText += ' '            
    return newText

with open(os.path.join(JSON_FILE_PATH, f'json_sample_{sampleID}.json')) as json_file:
    data = json.load(json_file)
    canvas_width = data['canvas_width']
    canvas_height = data['canvas_height']
    canvas_image = Canvas(width=canvas_width, height=canvas_height, sync_image_data=True)

    for i in range(data['length']):
        x = data[f'element_{i}']['left']*canvas_width
        y = data[f'element_{i}']['top']*canvas_height
        width = data[f'element_{i}']['width']*canvas_width
        height = data[f'element_{i}']['height']*canvas_height
        if data[f'element_{i}']['type'] != 'textElement':
        # if data[f'element_{i}']['type'] == 'imageElement':
        # if data[f'element_{i}']['type'] != 'imageElement' and data[f'element_{i}']['type'] != 'textElement':
            image = ipyImage.from_file(io.BytesIO(b64decode(data[f'element_{i}']['image_bytes'])))
            canvas_image.draw_image(image, x, y, width, height)
            time.sleep(1)
        else :
            text = data[f'element_{i}']['text']
            font = data[f'element_{i}']['font']
            font_size = data[f'element_{i}']['font_size']
            text_align = data[f'element_{i}']['text_align']
            x = x + width if text_align == 'right' else x
            x = x + width/2 if text_align == 'center' else x
            angle = data[f'element_{i}']['angle']
            
            canvas_image.font = f'{font_size}px {font}'
            color_rgb = data[f'element_{i}']['color']
            color_hex = '#%02x%02x%02x' % (color_rgb[0], color_rgb[1], color_rgb[2])
            canvas_image.stroke_style = color_hex
            canvas_image.fill_style = color_hex
            canvas_image.rotate = angle / math.pi * 180
            canvas_image.text_align = text_align
            print(text + ': ' + text_align)
            # canvas_image.fill_text(text, x, y+height, max_width=width)
            text = line_break(text, height, font_size)
            texts = text.split('\n')
            for t in range(len(texts)):
                canvas_image.fill_text(texts[t], x, y+font_size*(t+1), max_width = width)
                
                time.sleep(2) # sleep more time when text can not be fully stored
            # canvas_image.stroke_rect(x, y, width, height)
canvas_image

INTERNATIONAL CONFERENCE ON: left
lines:3, lineWordNum:1
Medical and BioSciences: left
lines:2, lineWordNum:2
2019: left
lines:1, lineWordNum:1


Canvas(height=315, sync_image_data=True, width=851)