In [None]:
import os

# dependency for blender CUDA rendering
!apt update
!apt remove libtcmalloc-minimal4
!apt install libtcmalloc-minimal4
os.environ["LD_PRELOAD"] = "/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4.3.0"

In [None]:
!git clone https://github.com/GbotHQ/Blender-3D-document-rendering-pipeline.git
%cd "Blender-3D-document-rendering-pipeline"

In [None]:
!./download_blender_binary.sh

In [None]:
from pathlib import Path as pth

blender_path = pth("/content/blender-3.4.0-linux-x64/blender")
blend_file_path = pth("blender", "scene.blend")
script_path = pth("src", "main.py")
output_path = pth("renders")

blender_path = blender_path.resolve()
blend_file_path = blend_file_path.resolve()
script_path = script_path.resolve()
output_path = output_path.resolve()

render_engine = "CYCLES"
device = "CUDA" # either CPU, CUDA or OPTIX

In [None]:
from shutil import rmtree
from src import config

config.set_seed(42)
n_samples_to_generate = 2

config_dir_path = pth("config")

# clear previous configs
if config_dir_path.is_dir():
    rmtree(config_dir_path)
config_dir_path.mkdir(parents=True)

for i in range(n_samples_to_generate):
    sample = config.Config(device)
    sample.render.output_dir = str(output_path)
    config_path = config_dir_path / f"sample_{i:08d}.json"
    config.write_config(config_path, sample)

In [None]:
# clear output directory
if output_path.is_dir():
    rmtree(output_path)
    output_path.mkdir()

!"$blender_path" "$blend_file_path" --background --factory-startup --threads 0 --engine "$render_engine" --enable-autoexec --python "$script_path" -- --cycles-device "$device"

In [None]:
import numpy as np
import cv2 as cv
from google.colab.patches import cv2_imshow


def to_uint(img, dtype=np.uint8):
    return (np.clip(img, 0, 1) * np.iinfo(dtype).max).astype(dtype)


def to_float(img, fdtype=np.float32):
    return img.astype(fdtype) / np.iinfo(img.dtype).max


def imshow(img, downscale_amount=1):
    cv2_imshow(cv.resize(img, (img.shape[1] // downscale_amount, img.shape[0] // downscale_amount)))


def imread_coords(path):
    # unchanged to read as uint16
    coords = to_float(cv.imread(path, cv.IMREAD_UNCHANGED))
    alpha = coords[..., 0, None]
    # flip y to match opencv coordinates
    coords[..., 1] = 1 - coords[..., 1]
    coords = np.where(alpha < 1, -1, coords[..., 1:])
    coords = coords[..., ::-1]

    return coords, alpha


def bbox_to_corners(x0, y0, x1, y1):
    return (x0, y0), (x1, y0), (x1, y1), (x0, y1)


def remap_point(point, coords, src_img):
    point = np.asarray(point) / src_img.shape[:2][::-1]

    # distance from point
    distance = np.amax(np.abs(point - coords), axis=-1)

    # take the pixel with the lowest distance
    return np.unravel_index(np.argmin(distance), distance.shape)


def remap_bbox(x0, y0, x1, y1, coords, src_img):
    return np.asarray([remap_point(p, coords, src_img) for p in bbox_to_corners(x0, y0, x1, y1)])


def draw_rect(img, points):
    img = cv.polylines(img, (points,), True, (255, 0, 255), 2)
    for i in range(points.shape[0]):
        img = cv.circle(img, points[i], 6, (255, 0, 255), -1)
    return img


img = cv.imread(output_path / "sample_00000000/image0001.png")
document_img = cv.imread("test_assets/lorem ipsum.png")
coords, alpha = imread_coords(output_path / "sample_00000000/coordinates0001.png")

bbox = ((180, 256), (370, 512))

# visualize
pts = np.array(bbox_to_corners(*bbox[0], *bbox[1]), np.int32)
pts_remapped = remap_bbox(*bbox[0], *bbox[1], coords, document_img)
pts_remapped = pts_remapped[:, ::-1] # convert from numpy coordinates to opencv

document_img = draw_rect(document_img, pts)
img = draw_rect(img, pts_remapped)

imshow(document_img, 3)
imshow(img, 3)