In [1]:
import os
import cv2
import pycolmap
from hloc.utils import viz_3d
import numpy as np
import pixtrack.utils.pose_utils as pose_utils
from pixloc.utils.colmap import Camera as ColCamera
from pixloc.pixlib.geometry import Camera as PixCamera, Pose
import matplotlib.pyplot as plt
from pixtrack.utils.ingp_utils import load_nerf2sfm, initialize_ingp, sfm_to_nerf_pose, nerf_to_sfm_pose
from pixtrack.visualization.run_vis_on_poses import get_nerf_image
from scipy.spatial.transform import Rotation as R

In [2]:
sfm_path = '/data/pixtrack/outputs/nerf_sfm/aug_gimble_04MAR2022/aug_sfm/'
model = pycolmap.Reconstruction(sfm_path)


nerf_path = '/data/pixtrack/instant-ngp/snapshots/gimble_04MAR2022/weights.msgpack'
nerf2sfm_path = '/data/pixtrack/pixel-perfect-sfm/datasets/gimble_04MAR2022/nerf2sfm.pkl'
nerf2sfm = load_nerf2sfm(nerf2sfm_path)
testbed = initialize_ingp(nerf_path)

[0m14:23:43 [0;36mINFO     [0mLoading network config from: /data/pixtrack/instant-ngp/snapshots/gimble_04MAR2022/weights.msgpack[K[0m
14:23:44 [0;36mINFO     [0mGridEncoding:  Nmin=16 b=1.44727 F=2 T=2^19 L=16[K[0m
14:23:44 [0;36mINFO     [0mDensity model: 3--[HashGrid]-->32--[FullyFusedMLP(neurons=64,layers=3)]-->1[K[0m
14:23:44 [0;36mINFO     [0mColor model:   3--[Composite]-->16+16--[FullyFusedMLP(neurons=64,layers=4)]-->3[K[0m
14:23:44 [0;36mINFO     [0m  total_encoding_params=12599920 total_network_params=10240[K[0m


In [3]:
camera = model.cameras[1]
camera = ColCamera(None, 
                camera.model_name,
                int(camera.width),
                int(camera.height),
                camera.params)
camera = PixCamera.from_colmap(camera)
camera = camera.scale(0.5)
#nerf_img = get_nerf_image(self.testbed, nerf_pose, ref_camera)

In [6]:
all_imgs_in_altitude = []
for i in range(9533, 9573):
    all_imgs_in_altitude.append(f"mapping/IMG_{i}.png")
    
render_poses = []
for image_name in all_imgs_in_altitude:
    image = model.find_image_with_name(image_name)
    cIw = pose_utils.get_camera_in_world_from_colmap_image(image)
    render_poses.append(sfm_to_nerf_pose(nerf2sfm, cIw))

In [7]:
!pip3 install mediapy

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [8]:
from scipy.spatial.transform import Rotation as R
from scipy.spatial.transform import Slerp
from tqdm.notebook import tqdm

In [9]:
image_renders = []
intermediate_frames = 20
for pose_number in tqdm(range(len(render_poses))):
    pose1 = render_poses[pose_number]
    if pose_number+1<len(render_poses):
        pose2 = render_poses[pose_number+1]
        rotations = np.zeros((2, 3, 3))
        rotations[0, :, :] = pose1[:3, :3]
        rotations[1, :, :] = pose2[:3, :3]
        translations_x = np.linspace(pose1[0, -1], pose2[0, -1], intermediate_frames)
        translations_y = np.linspace(pose1[1, -1], pose2[1, -1], intermediate_frames)
        translations_z = np.linspace(pose1[2, -1], pose2[2, -1], intermediate_frames)
    else:
        break

    interpolation_times = np.arange(0, intermediate_frames).tolist()
    key_times = [0, intermediate_frames-1]    
    key_rotations = R.from_matrix(rotations)
    slerp = Slerp(key_times, key_rotations)
    intermediate_rotations = slerp(interpolation_times).as_matrix()
    #print(translations_x.shape[0])
    for pose_num in range(translations_x.shape[0]):
        camera_pose = np.eye(4)
        camera_pose[0, -1] = translations_x[pose_num]
        camera_pose[1, -1] = translations_y[pose_num]
        camera_pose[2, -1] = translations_z[pose_num]
        camera_pose[:3, :3] = intermediate_rotations[pose_num, :, :]
        nerf_img = get_nerf_image(testbed,camera_pose, camera)
        image_renders.append(nerf_img)


  0%|          | 0/40 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [10]:
import mediapy

In [None]:
mediapy.show_video(image_renders, width=320, height=240, fps=10)

In [11]:
import numpy as np
def trans_t(t):
    return np.array(
        [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, t], [0, 0, 0, 1],], dtype=np.float32,
    )


def rot_phi(phi):
    return np.array(
        [
            [1, 0, 0, 0],
            [0, np.cos(phi), -np.sin(phi), 0],
            [0, np.sin(phi), np.cos(phi), 0],
            [0, 0, 0, 1],
        ],
        dtype=np.float32,
    )


def rot_theta(th):
    return np.array(
        [
            [np.cos(th), 0, -np.sin(th), 0],
            [0, 1, 0, 0],
            [np.sin(th), 0, np.cos(th), 0],
            [0, 0, 0, 1],
        ],
        dtype=np.float32,
    )

def pose_spherical(theta, phi, radius, up_axis=0):
    """
    Spherical rendering poses, from NeRF
    """
    c2w = trans_t(radius)
    c2w = rot_phi(phi / 180.0 * np.pi) @ c2w
    c2w = rot_theta(theta / 180.0 * np.pi) @ c2w
    c2w = (
        np.array(
            [[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]],
            dtype=np.float32,
        )
        @ c2w
    )
    if up_axis != 0:
        vec_up = np.zeros(3, np.float32)
        up_dim = 2 - up_axis // 2
        other_dim = 1 if up_dim == 0 else 0

        vec_up[up_dim] = -1 if up_axis % 2 else 1
        vec_1 = np.zeros(3, np.float32)
        vec_1[other_dim] = 1
        vec_2 = np.cross(vec_up, vec_1)

        trans = np.eye(4, 4, dtype=np.float32)
        trans[:3, 0] = vec_1
        trans[:3, 1] = vec_2
        trans[:3, 2] = vec_up
        c2w = trans @ c2w
    return c2w

In [12]:
elevation = -10
radius = 4.
up_axis = 1
num_views = 720
render_poses = np.stack(
        [
            pose_spherical(angle, elevation, radius, up_axis - 1)
            for angle in np.linspace(-180, 180, num_views + 1)[:-1]
        ],
        0,
    )  # (NV, 4, 4)

for i in range(render_poses.shape[0]):
    render_poses[i, :3, 3] -= [0.5, 0., 2.]    
image_renders = []
testbed.background_color = [0., 0., 0., 1.]
for camera_pose in tqdm(render_poses):
    nerf_img = get_nerf_image(testbed, camera_pose, camera)
    image_renders.append(nerf_img)

  0%|          | 0/720 [00:00<?, ?it/s]

In [13]:
mediapy.show_video(image_renders, width=320*3, height=240*3, fps=30)

0
This browser does not support the video tag.


In [14]:
mediapy.write_video('gimble_360.mp4', image_renders, fps=45)