Testing of ResNet+MLP where we see how well it performs when trained using a NeRF but tested on 'actual' camera data.

In [14]:
# Some useful settings for interactive work
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
from functions.nerf import *
from pathlib import Path
import os
import torchvision
import numpy as np
from scipy.spatial.transform import Rotation as R
import json

In [16]:
os.chdir('/home/lowjunen/StanfordMSL/Image2Pose/nerfs')
gs_pipeline = NeRF(Path('outputs/colmap_in_mocap/splatfacto/2024-04-15_192132/config.yml'))

Output()

Output()

In [17]:
def drone2nerf_transform(pose):
    pd = pose[:3]
    qd = pose[3:]

    T_b2f = np.eye(4)
    T_b2f[:3,:3],T_b2f[:3,3] = R.from_quat(qd).as_matrix(),pd

    # Flightroom Frame to NeRF world frame
    T_f2n = np.eye(4)
    T_f2n[0:3,0:3] = np.array([
        [ 1.0, 0.0, 0.0],
        [ 0.0,-1.0, 0.0],
        [ 0.0, 0.0,-1.0]
    ])

    # Camera Frame to NeRF world frame
    T_c2b = np.eye(4)
    T_c2b[0:3,0:3] = np.array([
        [ 0.0, 0.0,-1.0],
        [ 1.0, 0.0, 0.0],
        [ 0.0,-1.0, 0.0]
    ])

    # Get image
    T_c2n = T_f2n@T_b2f@T_c2b
    
    return T_c2n

def waypoints_to_poses(tFOkf:np.ndarray,hz:int=10):
    Nkf = tFOkf.shape[1]

    FOs = []
    for i in range(Nkf-1):
        N = int((tFOkf[0,i+1]-tFOkf[0,i])*hz)

        fo = np.linspace(tFOkf[1:5,i],tFOkf[1:5,i+1],N+1).T
        if i < Nkf-2:
            fo = fo[:,0:-1]


        FOs.append(fo)

    FOs = np.concatenate(FOs,axis=1)

    poses = np.zeros((7,FOs.shape[1]))
    poses[0:3,:] = FOs[0:3,:]
    for i in range(FOs.shape[1]):
        poses[3:,i] = R.from_euler('z',FOs[3,i]).as_quat()

    return poses

In [18]:
tFOkf = np.array([
    [ 0.00, 1.00, 4.00, 5.00],
    [-2.00, 0.00, 3.00, 5.00],
    [-0.40,-0.40, 2.00, 2.00],
    [-1.00,-1.30,-1.30,-1.30],
    [ 0.00, 0.70, 0.30, 0.00]
])

poses = waypoints_to_poses(tFOkf,50)

In [19]:
# Desired Pose (in flightroom frame)
frames = []
idx = 0
for j in range(15):
    for i in range(poses.shape[1]):
        # Generate Transform
        T_c2n = drone2nerf_transform(poses[:,i])

        # Generate noise
        w_q = np.random.uniform(-0.1,0.1,4)
        w_p = np.random.uniform(-0.3,0.3,3)

        # Add noise
        qi = R.from_matrix(T_c2n[0:3,0:3]).as_quat()+w_q
        qi = qi/np.linalg.norm(qi)
        ti = T_c2n[0:3,3]+w_p    
        
        T_c2n[0:3,0:3] = R.from_quat(qi).as_matrix()
        T_c2n[0:3,3] = ti

        # Render Image
        pose = torch.tensor(T_c2n[0:3,:]).float()
        outputs = gs_pipeline.render(pose)

        img = torch.movedim(outputs['rgb'],2,0)
        file_name = 'rgb_'+str(idx).zfill(5)+'.png'
        file_path = 'nerf_images/'+file_name
        torchvision.utils.save_image(img, '../data/'+file_path)

        frames.append({"file_path": file_path,"transform_matrix":pose.tolist()})

        # Update index
        idx += 1

with open('../data/nerf_images/transforms.json', 'w') as json_file:
    json.dump({"frames":frames}, json_file, indent=4)
