## Get all required processed data from the ply file and JSON file individually

In [1]:
import torch
from gs_lib import getPlyData, getJSONData, getWorld2View, getProjectionMatrix, eval_sh, \
    getFinalCovariance, projection, calc_radius, calc_rectangle


# The only two files that are needed for rendering.
dataset = f"/home/suresh/2024/gs/datasets/models/truck"
iteration = "30000"
max_sh_degree = 3
json_file = f"{dataset}/cameras.json"
ply_file = f"{dataset}/point_cloud/iteration_{iteration}/point_cloud.ply"
view = 52
output_scale_down = 2  # How much to scale down from the original image
tile_size = 16  # Block size for tile-based splatting/rendering
bg = 0  # use 1 for white, 0 for black

# Print to see if proper
print(f"Files \t={ply_file}\n\t={json_file}")

# Check for the available paraller device
device = "mps" if getattr(torch, 'has_mps', False) else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device running on = {device}")


# Get xyz and E from plyfile
# Get world2view_matrix and projection_matrix
xyz, opacities, sh_all, E_raw, E = getPlyData(ply_file, max_sh_degree)
camera_center, w, h, final_w, final_h, fov_x, fov_y, focal_x, focal_y, total_blocks, world2view_matrix, projection_matrix = getJSONData(json_file, view, output_scale_down, tile_size)

# Print stuff to debug and check
print(f"\nCam center : \n{camera_center}\n")
print(f"\nW2V matrix : \n{world2view_matrix}\n")
print(f"\nProjection matrix : \n{projection_matrix}\n")
print(f"Tile size \t\t\t= {tile_size}")
print(f"Total blocks \t\t\t= {total_blocks}")
print(f"Original Width x Height \t= {w} x {h}")
print(f"Block-adjusted Width x Height \t= {final_w} x {final_h}")
print(f"Focal_x \t\t\t= {focal_x}")
print(f"Focal_y \t\t\t= {focal_y}")
print(f"FOV_x \t\t\t\t= {fov_x}")
print(f"FOV_y \t\t\t\t= {fov_y}")

Files 	=/home/suresh/2024/gs/datasets/models/truck/point_cloud/iteration_30000/point_cloud.ply
	=/home/suresh/2024/gs/datasets/models/truck/cameras.json
Device running on = cuda

Cam center : 
tensor([ 0.4077,  0.3685, -2.2599])


W2V matrix : 
tensor([[ 0.8014, -0.2391,  0.5483,  0.0000],
        [ 0.1906,  0.9709,  0.1449,  0.0000],
        [-0.5670, -0.0116,  0.8236,  0.0000],
        [-1.6784, -0.2866,  1.5843,  1.0000]])


Projection matrix : 
tensor([[ 1.1888,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  2.1197,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  1.0010,  1.0000],
        [ 0.0000,  0.0000, -0.1001,  0.0000]])

Tile size 			= 16
Total blocks 			= 2170
Original Width x Height 	= 1957 x 1091
Block-adjusted Width x Height 	= 992 x 560
Focal_x 			= 581.3302001953125
Focal_y 			= 578.6701049804688
FOV_x 				= 1.3986958265304565
FOV_y 				= 0.8816215991973877


## Now calculate items which require data from both files

In [2]:
# Computer colors. 
# As the colors are view dependent, we need the camera center position to calculate color
# We first calculate the distance of each gaussian from the camera
distanceFromCam = xyz - torch.tile(camera_center, (xyz.shape[0], 1))
distanceFromCam_normed = distanceFromCam / torch.linalg.norm(distanceFromCam, dim=1, keepdims=True)
sh2rgb = eval_sh(max_sh_degree, sh_all, distanceFromCam_normed)
colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
print(f"\nColors_precomp : \n{colors_precomp}\n")
bg_color = torch.ones([3]) if bg else torch.zeros([3])
print(f"\nBackground : \n{bg_color}\n")


Colors_precomp : 
tensor([[0.3391, 0.3510, 0.4118],
        [0.3817, 0.3509, 0.4344],
        [0.2098, 0.3660, 0.2177],
        ...,
        [1.0388, 1.0412, 1.0600],
        [0.3586, 0.3574, 0.2428],
        [0.8882, 0.8200, 0.9665]])


Background : 
tensor([0., 0., 0.])



In [3]:
# Now calculate the final 2D covariance matrix.
# This is the Equiation 5 in the official paper
final_covariance = getFinalCovariance(
    mean3d=xyz,
    cov3d=E_raw,
    viewmatrix=world2view_matrix,
    fov_x=fov_x,
    fov_y=fov_y,
    focal_x=focal_x,
    focal_y=focal_y
)

# print(f"\nFinal Covariance : \n{final_covariance}\n")

torch.Size([2541226, 3, 3])


In [4]:
# Now transform all the 3D gaussians to 2D screen space gaussians
# Parameters:
#     Projected_points : 3D-Cam-2D = world space points to camera space points to image space points)
#     camera_space_points : 3D-Cam = world space points to camera space points.
#     points_before_camera : culled points. This is True/False array. Points behind the camera are false.
projected_points, camera_space_points, points_before_camera = projection(xyz, world2view_matrix, projection_matrix)

In [5]:
# We can calculate the depth from the variable camera_space_points
# The z-axis in the camera coordinate system is the disantce from camera orgin to the world. Hence it is the depth
depth = camera_space_points[:, 2]

In [6]:
# Now we need to normalize all the points to NDC
# NDC = Normalized Device Coordinate. Look it up.
# TLDR = It brings all the points coordinate between values -1,1
final_2d_gaussians = ((projected_points[:, :2] + 1.0) * torch.tensor([final_w, final_h]) - 1.0) / 2

In [7]:
# We can remove all the unwanted gaussians that are behind the camera
# We can use the points_before_camera binary array as a mask
final_2d_gaussians_reduced = final_2d_gaussians[points_before_camera]
final_covariance_reduced = final_covariance[points_before_camera]
colors_precomp_reduced = colors_precomp[points_before_camera]
depth_reduced = depth[points_before_camera]
opacities_reduced = opacities[points_before_camera]
opacities_reduced = opacities_reduced.view(-1, 1)

In [8]:
# To splat the gaussians on the screen, we need to find the area of the gaussians
# We first find the radius/spread of the gaussians.
# Radius is basically the spread of the gaussian. Hence we can calculate it from the covariance matrix
# We can calculate the top-left coordinate and bottom-right coordinate of the bounding box of the gaussian
# We need radius, the 2d coordiante of gaussian, the width and height of the final image.
radius = calc_radius(final_covariance_reduced)
bounding_rectange = calc_rectangle(final_2d_gaussians_reduced, radius, final_w, final_h)