In [None]:
from pathlib import PurePath as ppth

!mkdir ./blender
%cd blender

Blender_version_download_link_for_linux = "https://download.blender.org/release/Blender3.4/blender-3.4.0-linux-x64.tar.xz"

!wget -nc $Blender_version_download_link_for_linux

def get_filename(path):
  return str(ppth(path).name)

filename = get_filename(Blender_version_download_link_for_linux)

!tar -xvf $filename --strip-components 1

%cd ..

In [None]:
import os

# dependency for CUDA rendering
!apt update
!apt remove libtcmalloc-minimal4
!apt install libtcmalloc-minimal4
os.environ["LD_PRELOAD"] = "/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4.3.0"

In [None]:
!git clone https://github.com/GbotHQ/vqa-dataset-scene-generator-test.git
%cd vqa-dataset-scene-generator-test

In [None]:
from pathlib import Path as pth
from pathlib import PurePath as ppth

from random import random, uniform, randint

from src import config


project_path = pth.cwd()

blender_path = "/content/blender/blender"
blend_file_path = project_path / "blender" / "scene.blend"
script_path = project_path / "src" / "main.py"

In [None]:
def generator(index):
    output_path = pth(project_path, "renders")
    assets_path = ppth(project_path, "test_assets")
    texture_path_base = ppth(assets_path, "WoodenPlanks05_MR_2K")

    render_dir_base = "dataset_render_"

    output_path /= f"{render_dir_base}{str(index).zfill(8)}"

    general_conf = {}
    general_conf["resolution"] = (1024, 1024)
    general_conf["render_engine"] = "cycles"
    general_conf["cycles_device"] = "gpu"
    general_conf["cycles_samples"] = 8
    general_conf["cycles_denoise"] = True
    general_conf["output_path"] = str(output_path)

    ground_conf = {}
    ground_conf["offset"] = uniform(-10, 10)
    ground_conf["texture_rotation"] = uniform(0, 360)
    ground_conf["displacement_strength"] = uniform(0.04, 0.2)
    ground_conf["subdivisions"] = 9

    for name, type in zip(
        (
            "WoodenPlanks05_2K_BaseColor.png",
            "WoodenPlanks05_2K_Roughness.png",
            "WoodenPlanks05_2K_Height.png",
        ),
        ("albedo", "roughness", "depth"),
    ):
        ground_conf[f"texture_path_{type}"] = str(texture_path_base / name)

    paper_conf = {}
    paper_conf["text_image_path"] = str(ppth(assets_path, "lorem ipsum.psd"))
    paper_conf["size"] = (21.0, 29.7)
    paper_conf["subdivisions"] = 8
    paper_conf["crumpling_strength"] = uniform(0, 1.5)
    paper_conf["fold_messiness"] = uniform(0.03, 0.4)
    paper_conf["fold_smoothness"] = uniform(0, 1)
    paper_conf["texture_rotation"] = uniform(0, 360)
    paper_conf["offset"] = uniform(-10, 10)

    folds_conf = []
    for _ in range(2):
        fold = {}
        fold["strength"] = uniform(0.1, 0.8) if random() > 0.3 else 0.0
        fold["angle"] = uniform(-15, 15)
        folds_conf.append(fold)

    folds_conf[1]["angle"] += 90

    camera_conf = {}
    camera_conf["focal_length"] = randint(24, 135)
    camera_conf["relative_camera_distance"] = 1.3
    camera_conf["depth_of_field"] = True
    camera_conf["fstop"] = uniform(0.8, 1.8)
    camera_conf["orbit"] = (uniform(0, 25), uniform(0, 360))
    camera_conf["look_at_2d"] = (0, 0)

    hdri_conf = {}
    hdri_conf["hdri_image_path"] = str(assets_path / "canary_wharf_2k.exr")
    hdri_conf["hdri_strength"] = uniform(0.02, 0.12)
    hdri_conf["hdri_image_rotation"] = uniform(0, 360)

    lights_conf = []
    for _ in range(2):
        light = {}
        light["visible"] = True

        light["distance"] = uniform(2, 4)
        light["orbit"] = uniform(0, 45), uniform(0, 360)
        light["look_at_2d"] = uniform(-0.4, 0.4), uniform(-0.4, 0.4)

        light["power"] = uniform(300, 800)
        light["shadow_softness_radius"] = uniform(0.1, 0.8)
        light["light_cone_angle"] = uniform(30, 90)

        color = [uniform(0.7, 1) for _ in range(3)]
        color_total = sum(color)
        color = [c / color_total for c in color]
        light["color"] = color
        lights_conf.append(light)

    lights_conf[1]["visible"] = random() > 0.7
    if lights_conf[1]["visible"]:
        # both lights are visible, need to reduce light power
        for light in lights_conf:
            light["power"] /= 2

    conf = {
        "general": general_conf,
        "ground": ground_conf,
        "paper": paper_conf,
        "folds": folds_conf,
        "camera": camera_conf,
        "hdri": hdri_conf,
        "lights": lights_conf,
    }

    return conf

In [None]:
n_samples_to_generate = 2
samples = [generator(i) for i in range(n_samples_to_generate)]
config.write(samples)

In [None]:
import shutil

shutil.rmtree("renders")
%mkdir renders

!"$blender_path" "$blend_file_path" --background --factory-startup --threads 0 --engine CYCLES --enable-autoexec --python "$script_path" -- --cycles-device CUDA

In [None]:
import numpy as np
import cv2 as cv
from google.colab.patches import cv2_imshow


def to_uint(img, dtype=np.uint8):
    return (np.clip(img, 0, 1) * np.iinfo(dtype).max).astype(dtype)


def to_float(img, fdtype=np.float32):
    return img.astype(fdtype) / np.iinfo(img.dtype).max


def imshow(img, downscale_count=1):
    cv2_imshow(cv.resize(img, np.array(img.shape[:2][::-1], np.int32) // downscale_count))


def imread_coords(path):
    # unchanged to read as uint16
    coords = to_float(cv.imread(path, cv.IMREAD_UNCHANGED))
    alpha = coords[..., 0, None]
    # flip y to match opencv coordinates
    coords[..., 1] = 1 - coords[..., 1]
    coords = np.where(alpha < 1, -1, coords[..., 1:])
    coords = coords[..., ::-1]

    return coords, alpha


def bbox_to_corners(x0, y0, x1, y1):
    return (x0, y0), (x1, y0), (x1, y1), (x0, y1)


def remap_point(point, coords, src_img):
    point = np.asarray(point) / src_img.shape[:2][::-1]

    # distance from point
    distance = np.amax(np.abs(point - coords), axis=-1)

    # take the pixel with the lowest distance
    remapped_point = np.unravel_index(np.argmin(distance), distance.shape)

    return remapped_point


def remap_bbox(x0, y0, x1, y1, coords, src_img):
    return np.asarray([remap_point(p, coords, src_img) for p in bbox_to_corners(x0, y0, x1, y1)])


def draw_rect(img, points):
    img = cv.polylines(img, (points,), True, (255, 0, 255), 2)
    for i in range(points.shape[0]):
        img = cv.circle(img, points[i], 6, (255, 0, 255), -1)
    return img


# renders/dataset_render_00000000/
img = cv.imread("renders/dataset_render_00000000/image0001.png")
document_img = cv.imread("test_assets/lorem ipsum.png")
coords, alpha = imread_coords("renders/dataset_render_00000000/coordinates0001.png")

bbox = ((180, 256), (370, 512))

# visualize
pts = np.array(bbox_to_corners(*bbox[0], *bbox[1]), np.int32)
pts_remapped = remap_bbox(*bbox[0], *bbox[1], coords, document_img)
pts_remapped = pts_remapped[:, ::-1] # convert from numpy coordinates to opencv

document_img = draw_rect(document_img, pts)
img = draw_rect(img, pts_remapped)

imshow(document_img, 3)
imshow(img, 3)