In [1]:
import os
os.environ['PYOPENGL_PLATFORM'] = 'egl'

import numpy as np
from scipy.spatial.transform import Rotation as R
from scipy.linalg import block_diag
import trimesh
import pyrender
from pyrender import RenderFlags
from PIL import Image
import matplotlib.pyplot as plt
from pathlib import Path
import json
from tqdm import tqdm

In [2]:
shapenet_path = Path("/mnt/ML/Datasets/shapenet renders/shapenet-orig")

with open('/mnt/ML/Datasets/shapenet renders/pyrender_assets.json', 'r') as f:
    asset_ids = json.load(f)

In [3]:
def rotation_mat(g, a):
    rot = R.from_euler('xy', [g, a], degrees=True)
    return block_diag(rot.as_matrix(), 1)

def translation_mat(v):
    return np.block([[np.identity(3), np.array(v).reshape((-1, 1))],
                     [np.zeros((1, 3)), 1.]])

In [4]:
def get_lights():
    light = pyrender.SpotLight(color=np.ones(3), 
                               intensity=8.0,
                               innerConeAngle=np.pi/16.0,
                               outerConeAngle=np.pi/6.0)
    light_poses = [rotation_mat(-40, a) @ translation_mat([0, 0, 2]) for a in [225, 90]]
    lights = [(light, light_pose) for light_pose in light_poses]
    return lights

def get_camera_node(yfov, resolution):
    camera = pyrender.PerspectiveCamera(yfov=yfov, aspectRatio=resolution[0] / resolution[1])
    camera_node = pyrender.Node(camera=camera, matrix=np.identity(4))
    return camera_node

In [5]:
def get_mesh(asset_id):
    path = shapenet_path / asset_id / "models/model_normalized.obj"
    mesh = trimesh.load(path)
    mesh.apply_transform(translation_mat(-mesh.centroid))
    mesh.apply_transform(translation_mat([0, -mesh.bounds[0, 1], 0]))
    return mesh

In [6]:
def scene_from_mesh(mesh):
    scene = pyrender.Scene.from_trimesh_scene(mesh, ambient_light=(0.5,)*3, bg_color=(0,)*4)
    return scene

In [7]:
def render_and_save(path, scene, renderer, metadata):
    color, depth = renderer.render(scene, flags=RenderFlags.RGBA)
    color_img = Image.fromarray(color)
    max_depth = 1.6
    depth_uint = (255 * depth / max_depth).astype(np.uint8)
    depth_img = Image.fromarray(depth_uint)
    
    (path / 'images').mkdir(parents=True, exist_ok=True)
    metadata['rgba_path'] = f"images/rgba_{metadata['id']:05d}.png"
    metadata['depth_path'] = f"images/depth_{metadata['id']:05d}.png"
    color_img.save(path / metadata['rgba_path'])
    depth_img.save(path / metadata['depth_path'])

In [8]:
def render_asset(renders_path, lights, camera_node, renderer, asset_id, resolution=(256, 256), camera_angles=[], num_random=0, angle_range=((0, -90), (360, 90))):
    mesh = get_mesh(asset_id)
    scene = scene_from_mesh(mesh)
    for light, light_pose in lights:
        scene.add(light, pose=light_pose)
    scene.add_node(camera_node)
    
    random_angles = (angle_range[0] + np.random.rand(num_random, 2) * (np.array(angle_range[1]) - angle_range[0])).tolist()
    
    yfov = camera_node.camera.yfov
    xfov = float(2*np.arcsin(camera_node.camera.aspectRatio*np.sin(yfov/2)))
    metadatas = [
        {
            'id': i,
            'asset_id': asset_id,
            'resolution': resolution,
            'x_fov': xfov,
            'y_fov': yfov,
            'camera_angle': angle,
        }
        for i, angle in enumerate(camera_angles + random_angles)
    ]
    
    asset_render_path = renders_path / asset_id
    asset_render_path.mkdir(parents=True, exist_ok=True)
    
    for metadata in metadatas:
        angle = metadata['camera_angle']
        camera_pose = translation_mat([0, 0.1, 0]) @ rotation_mat(-angle[1], angle[0]+180) @ translation_mat([0, 0, 1])
        metadata['camera_pose'] = camera_pose.tolist()
        scene.set_pose(camera_node, pose=camera_pose)
        render_and_save(asset_render_path, scene, renderer, metadata)
    
    p = np.array([
        [1,  0,  0,  0],
        [0,  0, -1,  0],
        [0,  1,  0,  0],
        [0,  0,  0,  1]
    ])
    transforms = {
        'camera_angle_x': xfov,
        'camera_angle_y': yfov,
        'w': resolution[0],
        'h': resolution[1],
        'aabb_scale': 0.5,
        'frames': [{
            'file_path': metadata['rgba_path'],
            'transform_matrix': np.matmul(p, metadata['camera_pose']).tolist(),
        } for metadata in metadatas],
    }
    
    asset_transforms_path = asset_render_path / 'transforms.json'
    with open(asset_transforms_path, 'w') as f:
        json.dump(transforms, f, indent=4)

    asset_meta_path = asset_render_path / 'metadata.json'
    with open(asset_meta_path, 'w') as f:
        json.dump(metadatas, f, indent=4)
        
    return metadatas

In [9]:
def render_batch(renders_path, lights, camera_node, renderer, asset_ids, resolution=(256, 256), camera_angles=[], num_random=0, angle_range=((0, -90), (360, 90))):
    metadatas = []
    for asset_id in tqdm(asset_ids, desc='Rendering...'):
        try:
            metadata = render_asset(renders_path, lights, camera_node, renderer, asset_id, resolution, camera_angles, num_random, angle_range)
            metadatas.append(metadata)
        except Exception as e:
            print(e)
            print(f'failed: asset id {asset_id}')
            continue
    with open(renders_path / 'metadatas.json', 'w') as f:
        json.dump(metadatas, f, indent=4)

In [10]:
resolution = (256, 256)
camera_angles = [] #[(a, 20) for a in [30, 60, 150]]
num_random = 100
angle_range = ((0, 0), (360, 35))

lights = get_lights()
yfov = np.pi / 3.0
camera_node = get_camera_node(yfov, resolution)
renderer = pyrender.OffscreenRenderer(viewport_width=resolution[0], viewport_height=resolution[1], point_size=1.)
batch_asset_ids = asset_ids
render_batch(Path('renders_100_views_res_256'), lights, camera_node, renderer, batch_asset_ids, resolution, camera_angles, num_random, angle_range)
renderer.delete()

Rendering...: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 3456/3456 [5:23:00<00:00,  5.61s/it]
