In [1]:
from tensorflow import reshape
import tensorflow as tf
import json
import numpy as np

In [2]:
def read_json(json_file_path):

    with open(json_file_path, 'r') as f:
        data = json.load(f)
    return data

In [3]:
def get_image_data(data_dict, base_img_dir = 'Data/ship/'):
    '''
        Implement the function to get the image path and corresponding camera transformation matrix.
    '''

    img_paths = []
    img_cam_transform_matrices = []

    all_frames = data_dict["frames"]

    return

In [4]:
class getImage():
	def __init__(self, imageWidth, imageHeight):

		self.imageWidth = imageWidth
		self.imageHeight = imageHeight

	def __call__(self, imagePath):

		image = read_file(imagePath)
		image = decode_jpeg(image, 3)
		image = convert_image_dtype(image, dtype=tf.float32)
		image = resize(image, (self.imageWidth, self.imageHeight))
		image = reshape(image, (self.imageWidth, self.imageHeight, 3))

		return image

In [5]:
def get_rays_for_image_matrix(width, height, camera2world_matrix, focal_length):

    r_mat = camera2world_matrix[:3, :3]
    o_vec = camera2world_matrix[:3, -1:]

    i, j =  np.meshgrid(
            np.arange(height, dtype=np.float32),
            np.arange(width, dtype=np.float32),
                        indexing='ij')

    vector_x = j - width*0.5
    vector_y = - (i - height*0.5)
    vector_z = - np.ones_like(vector_x)*focal_length

    dirs = np.stack((vector_x, vector_y, vector_z), axis=2)
    ray_dirs = np.sum(r_mat * dirs[:, :, None, :], axis=3)

    origins = np.ones_like(ray_dirs)*o_vec.squeeze()

    return ray_dirs, origins

In [6]:
def get_sampled_ray_points(ray_dirs, near=1, far=10, num_samples=20):

    bins = np.linspace(near, far, num_samples)
    ran_samples = np.random.uniform(size=(list(ray_dirs.shape[:-1])+[num_samples]))  # generate random samples for each point of the input matrix
    scaled_ran_samples = ran_samples*(far-near)/num_samples
    ray_samples = scaled_ran_samples + np.broadcast_to(bins, shape=(list(ray_dirs.shape[:-1])+[num_samples]))

    return ray_samples

In [7]:
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, concatenate


class NeRFModel(Model):

    def __init__(self, positional_encode_size, direction_encode_size, dense_layer_size, batch_size, skip_layer_idx, name=None):
        super(NeRFModel, self).__init__(name)

        self._skip_idx_val = skip_layer_idx

        ### TODO NOTE THE INPUT DIMENSIONS MAY BE WRONG! CHECK AND FIX BEFORE USING THE MODEL  
        # Roughly dimension should be (img_height ,img_width, num_of_sample_ray_points, point_encoding_size) per frame
        self.__ray_input_layer  = Input(shape=(None, None, None, positional_encode_size), batch_size=batch_size)  #None, None, None, 2 * 3 * positional_encode_size + 3
        self.__directional_input_layer = Input(shape=(None, None, None, direction_encode_size), batch_size=batch_size)
        
        self.__dense_layers = []

        for i in range(8):
            self.__dense_layers.append(Dense(units=dense_layer_size, activation="relu"))

        self.__density_output = Dense(units=1, activation="relu")

        self.__inner_feature = Dense(units=dense_layer_size)
        self.__inner_dense1 = Dense(units=dense_layer_size//2, activation="relu")
        self.__output_dense = Dense(units=3, activation="sigmoid")


    def call(self, rays, dirs):

        x = self.__ray_input_layer(rays)

        for i in range(self.__dense_layers):
            
            x = self.__dense_layers[i](x)
            
            # if residual connection point
            if i % self._skip_idx_val == 0 and i > 0:
                # inject the residual connection
                x = concatenate([x, rays], axis=-1)
        
        # Density value
        density_vals = self.__density_output(x)

        # Color value
        color_features = self.__inner_feature(x)
        x = concatenate([color_features, dirs], axis=-1)
        x = self.__inner_dense1(x)
        color_vals = self.__output_dense(x)
        
        return color_vals, density_vals

Using the provided implementation of the original source code repo, since it is bit different from what I understood from the paper.

In [8]:
def render_image_depth(colors, volume_density, sample_values):

	# calculate the delta between adjacent samples
	delta = sample_values[..., 1:] - sample_values[..., :-1]
	delta = tf.concat([delta, [1e10]], axis=-1)

	# calculate alpha from sigma and delta values
	alpha = 1.0 - tf.exp(-volume_density * delta)

    # calculate the transmittance and weights of the ray points
	expTerm = tf.exp(-volume_density * delta)
	epsilon = 1e-10
	transmittance = tf.math.cumprod(expTerm + epsilon, axis=-1, exclusive=True)
	weights = alpha * transmittance
	
	# build the image and depth map from the points of the rays
	image = tf.reduce_sum(weights[..., None] * colors, axis=-2)
	depth = tf.reduce_sum(weights * sample_values, axis=-1)
	
	# return rgb, depth map and weights
	return (image, depth, weights)

In [9]:

def get_fine_samples(weights, bins, size):

    cum_weights = np.sum(weights)
    pdf = weights/cum_weights
    cdf = np.concatenate(([0], np.cumsum(pdf)))

    u = np.random.uniform(size=(size))
    indices = np.searchsorted(cdf, u)

    # define the boundaries
    below = np.maximum(0, indices-1)
    above = np.minimum(cdf.shape[-1]-1, indices)
    indicesG = np.stack([below, above], axis=-1)  # Contains the bounds of indices returned. Print the value to understand

    # gather the cdf according to the indices
    cdfG = np.take(cdf, indicesG, axis=-1)

    # gather the tVals according to the indices
    tValsMidG = np.take(bins, indicesG, axis=-1) # Get the upper and lower bound bins values

    # getting the CDF range for the each of the bins
    denom = cdfG[..., 1] - cdfG[..., 0]
    denom = np.where(denom < 1e-5, np.ones_like(denom), denom)

    # Scaling the size to CDF lower and upper bound range (t is like a percentage size compared to its boundary)
    t = (u - cdfG[..., 0]) / denom

    # Scaling the sample size to bin range
    samples = (tValsMidG[..., 0] + t * 
        (tValsMidG[..., 1] - tValsMidG[..., 0]))

    return samples

Now to actually train the NeRF model following steps need to be followed.

1. Read the image dataset and generate ray direction and ray origins
2. Encode the ray directions and ray origins
3. Pass the encoded values to Coarse NeRF
4. From the returned weights, get fine samples
5. Pass the fine samples to NeRF model
6. Calculate loss (both coarse and fine)
7. Back propagation

In [23]:
def load_image(filename):
    raw = tf.io.read_file(filename)
    image = tf.image.decode_png(raw, channels=3)
    return image

In [26]:
img = load_image('./Data/ship/train/r_0.png')
img.shape

TensorShape([800, 800, 3])