In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import skimage.io as skio
import torch.optim as optim
import skimage as sk
import random

In [2]:
data = np.load(f"lego_200x200.npz")
images_train = data["images_train"] / 255.0
c2ws_train = data["c2ws_train"]
images_val = data["images_val"] / 255.0
c2ws_val = data["c2ws_val"]
c2ws_test = data["c2ws_test"]
focal = data["focal"]

(100, 200, 200, 3)


In [4]:
height = 200
width = 200

In [5]:
K = np.array([[focal,0,width/2],[0,focal,height/2],[0,0,1]])

In [7]:
c2w = c2ws_train[0]

In [22]:
c2w

array([[-9.99902189e-01, -4.19224519e-03,  1.33457193e-02,
        -5.37983216e-02],
       [-1.39886811e-02,  2.99659073e-01, -9.53943670e-01,
         3.84547043e+00],
       [-4.65661287e-10, -9.54037189e-01, -2.99688309e-01,
         1.20808232e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         1.00000000e+00]])

In [8]:
def transform(c2w, x_c):
    #camera to world
    num_rows = len(x_c)
    ones_column = np.ones((num_rows, 1))
    x_c_with_one = np.concatenate((x_c, ones_column), axis=1)
    x = (c2w @ x_c_with_one.T).T
    return x[:,:-1]

In [39]:
aaa = np.array([[0,0,0]])
print(transform(c2w, aaa))

[[-0.05379832  3.84547043  1.20808232]]


In [46]:
w2c = np.linalg.inv(c2w)

In [47]:
print(w2c[:3,:3])
print(w2c[:3,3])

[[-9.99902118e-01 -1.39886803e-02 -1.19264272e-10]
 [-4.19224401e-03  2.99659014e-01 -9.54037109e-01]
 [ 1.33457230e-02 -9.53943931e-01 -2.99688425e-01]]
[ 6.33068112e-10 -5.15363604e-08  4.03112944e+00]


In [48]:
print(-np.linalg.inv(w2c[:3,:3]) @ w2c[:3,3])

[-0.05379832  3.84547043  1.20808232]


In [9]:
def pixel_to_camera(K, uv,s):
    num_rows = len(uv)
    ones_column = np.ones((num_rows, 1))
    uv_with_one = np.concatenate((uv, ones_column), axis=1)
    result = (np.linalg.inv(K) @ uv_with_one.T).T
    return result

In [63]:
aaa = np.array([[1,2,3],[10,11,12],[20,21,22],[21,22,23]])
b = np.array([[0.9,2,3]])
c = aaa - b
norms = np.linalg.norm(c, axis=1, keepdims=True)
print(norms)
print(c/norms)

[[ 0.1       ]
 [15.64640534]
 [32.96680148]
 [34.69884724]]
[[1.         0.         0.        ]
 [0.58160324 0.57521199 0.57521199]
 [0.57937073 0.57633738 0.57633738]
 [0.57926996 0.57638802 0.57638802]]


In [12]:
def pixel_to_ray(K, c2w, uv):
    zeros = np.array([[0,0,0]])
    origin = transform(c2w, zeros)
    depth_1_points = pixel_to_camera(K, uv, 1)
    world_depth_1_points = transform(c2w, depth_1_points)
    world_depth_1_points_direction = world_depth_1_points - origin
    norms = np.linalg.norm(world_depth_1_points_direction, axis=1, keepdims=True)
    directions = world_depth_1_points_direction/ norms

    return origin, directions
    

In [10]:
class RaysData(Dataset):
    def __init__(self, img_train, K, c2ws_train):
        self.img = img_train
        self.c2ws = c2ws_train
        self.K = K
        self.height = 200
        self.width = 200
        self.length = len(self.img) * self.height * self.width

    def __len__(self):
        return len(self.img * self.height * self.width)
    
    def __getitem__(self, idx):
        pass
        # x = idx // self.width 
        # y = idx % self.width 
        # rgb = [self.image[x,y,0],
        #        self.image[x,y,1],
        #        self.image[x,y,2]]
        # sample = {'input':torch.tensor([x/self.height,y/ self.width]),
        #           "label":torch.tensor(rgb)}
        # return sample

    def sample_rays(self, num_samples):
        rays_o = []
        rays_d = []
        pixels = []
        random_numbers = [random.randint(0,self.length -1) for _ in range(num_samples)]
        for random_number in random_numbers:
            img_index = random_number // (self.width*self.height)
            residual = random_number % (self.width*self.height)
            temp_height = residual // self.height
            temp_width = residual % self.width
            c2w = self.c2ws[img_index]
            uv = np.array([[temp_height, temp_width]])
            ray_o, ray_d = pixel_to_ray(self.K, c2w, uv)
            rays_o.append(ray_o[0])
            rays_d.append(ray_d[0])
            pixels.append(self.img[img_index,temp_height,temp_width,:])
        return rays_o, rays_d,pixels
    

In [13]:
dataset = RaysData(images_train, K, c2ws_train)
rays_o, rays_d, pixels = dataset.sample_rays(5)

In [14]:
def sample_along_rays(rays_o, rays_d, perturb = True):
    far = 6 
    near = 2
    n_samples = 32
    points = []
    for ray_o, ray_d in zip(rays_o, rays_d):
        for t in np.linspace(near, far, n_samples):
            ran = random.uniform(0, (far - near)/n_samples)
            p_t = t + ran
            points.append(ray_o + ray_d * p_t)
    return np.array(points)

In [15]:
points = sample_along_rays(rays_o, rays_d)
print(len(points))

160


In [16]:


import viser, time  # pip install viser
import numpy as np

# --- You Need to Implement These ------
dataset = RaysData(images_train, K, c2ws_train)
rays_o, rays_d, pixels = dataset.sample_rays(5)
points = sample_along_rays(rays_o, rays_d, perturb=True)
H, W = images_train.shape[1:3]

In [17]:
server = viser.ViserServer(share=True)
for i, (image, c2w) in enumerate(zip(images_train, c2ws_train)):
    server.add_camera_frustum(
        f"/cameras/{i}",
        fov=2 * np.arctan2(H / 2, K[0, 0]),
        aspect=W / H,
        scale=0.15,
        wxyz=viser.transforms.SO3.from_matrix(c2w[:3, :3]).wxyz,
        position=c2w[:3, 3],
        image=image
    )
for i, (o, d) in enumerate(zip(rays_o, rays_d)):
    server.add_spline_catmull_rom(
        f"/rays/{i}", positions=np.stack((o, o + d * 6.0)),
    )
server.add_point_cloud(
    f"/samples",
    colors=np.zeros_like(points).reshape(-1, 3),
    points=points.reshape(-1, 3),
    point_size=0.02,
)
time.sleep(1000)

KeyboardInterrupt: 

In [50]:
def volrend(sigmas, rgbs, step_size):
    size_to_prepend = (sigmas.size(0), 1, 1)

    zeros_to_prepend = torch.zeros(size_to_prepend, dtype=sigmas.dtype)
    
    tensor_with_zeros = torch.cat((zeros_to_prepend, sigmas), dim=1)

    
    
    cum_sigmas = torch.cumsum(tensor_with_zeros,dim=1)[:,:-1]
    T = torch.exp(-cum_sigmas*step_size)
    interval_sigmas = 1 - torch.exp(-sigmas*step_size)
    weights = T * interval_sigmas
    colors = rgbs * weights
    cum_colors = torch.sum(colors, dim=1)

    return cum_colors

In [52]:
torch.manual_seed(42)
sigmas = torch.rand((10, 64, 1)) * 1000
rgbs = torch.rand((10, 64, 3))
step_size = (6.0 - 2.0) / 64
rendered_colors = volrend(sigmas, rgbs, step_size)
correct = torch.tensor([
  [0.6020, 0.0316, 0.9366],
  [0.0620, 0.2249, 0.1381],
  [0.7785, 0.4253, 0.7124],
  [0.8748, 0.5055, 0.7411],
  [0.2240, 0.5240, 0.4298],
  [0.0531, 0.7500, 0.0501],
  [0.0458, 0.9415, 0.4620],
  [0.6692, 0.3450, 0.0991],
  [0.7392, 0.6365, 0.3080],
  [0.2425, 0.9346, 0.9305]]
)
assert torch.allclose(rendered_colors, correct, rtol=1e-4, atol=1e-4)