In [2]:
import numpy as np
import pandas as pd
import math as m
from einops import rearrange, repeat
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
from tqdm import tqdm


### Preperation (NOTE: Using meter as unit)

In [3]:
# Utility functions
# convert pointcloud from cartisean coordinate to spherical coordinate
def cart2sph(xyz):
    x = xyz[:,0]
    y = xyz[:,1]
    z = xyz[:,2]
    XsqPlusYsq = x**2 + y**2
    r = np.sqrt(list(XsqPlusYsq + z**2))
    elev = np.arctan2(list(z), np.sqrt(list(XsqPlusYsq)))
    pan = np.arctan2(list(x), list(y))

    output = np.array([r, elev, pan])
    return rearrange(output, 'a b -> b a') #take transpose


def sph2cart(ang):
    ele = ang[:,0]
    pan = ang[:,1]
    x = np.cos(ele)*np.cos(pan)
    y = np.cos(ele)*np.sin(pan)
    z = np.sin(ele)
    output = np.array([x,y,z])
    return rearrange(output, 'a b -> b a') #take transpose

In [4]:
def loadData():
    # Specify the directory path
    dataset_path = 'datasets/testing1'

    # List all files in the specified path, ignoring directories
    files = [f for f in os.listdir(dataset_path) if os.path.isfile(os.path.join(dataset_path, f))]
    files.sort()

    # read the files
    points_xyz = []
    for s in files:
        path = 'datasets/testing1/' + s
        df = pd.read_csv(path)
        a = df.to_numpy()
        points_xyz.append(a[:,8:11])
    return points_xyz

def prepareData(points_xyz):
    # Find the fiew direction of each points:
    # NOTE: points in spherical coordinate are arranged: [r, elev, pan]
    points_sphere = []
    for points in points_xyz:
        points_sphere.append(cart2sph(points))

    ### Process the data
    # Translation vectors for points in each view, we are using camera centre at first frame as origin of world coordinate
    # NOTE: translation vectors below are found by assuming transformation between frames are translations, and obatined by manually finding corrspondance
    # They are translation of the same corrspondance across different frames
    # HARD CODED HERE
    t0 = np.array([0,0,0])
    t1 = np.array([-0.671,-0.016,0.215])
    t2 = np.array([-1.825,-0.091,0.147])
    t3 = np.array([-2.661,-0.263,0.166])
    t4 = np.array([-3.607,-0.156,0.039])
    translations = [t0, t1, t2, t3, t4]

    # camera centre locations
    centres = [-t for t in translations]
    centres_data = []
    for i,c in enumerate(centres):
        l = len(points_sphere[i])
        temp = np.tile(c, (l, 1))
        centres_data.append(temp)

    # stack the points into one big matrix
    stacked = []
    for i in range(len(points_sphere)):
        temp = np.hstack((points_sphere[i], centres_data[i]))
        stacked.append(temp)

    dataset = np.array([])
    for i in range(len(stacked)):
        if i == 0:
            dataset = stacked[i]
        else:
            dataset = np.vstack((dataset, stacked[i]))
    np.random.shuffle(dataset)

    # Mid pass filter, for distance value between 2 and 50 meter
    mask1 = dataset[:,0] > 2
    dataset = dataset[mask1]
    mask2 = dataset[:,0] < 50
    dataset = dataset[mask2]

    return dataset

In [5]:
class LiDAR_NeRF(nn.Module):
    def __init__(self, embedding_dim_pos = 10, embedding_dim_dir = 4, hidden_dim = 256, device = 'cuda'):
        super(LiDAR_NeRF, self).__init__()
        self.device = device
        self.embedding_dim_dir = embedding_dim_dir
        self.embedding_dim_pos = embedding_dim_pos
        self.block1 = nn.Sequential(
            nn.Linear(embedding_dim_pos * 6 + 3 + embedding_dim_dir * 4 + 2, hidden_dim), nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
        )
        
        self.block2 = nn.Sequential(
            nn.Linear(embedding_dim_pos * 6 + 3 + embedding_dim_dir * 4 + 2 + hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
            nn.Linear(hidden_dim,1)
        )
        
    @staticmethod
    def positional_encoding(x, L):
        out = [x]
        for j in range(L):
            out.append(torch.sin(2 ** j * x))
            out.append(torch.cos(2 ** j * x))
        return torch.cat(out, dim=1)
    

    def forward(self, o, d):
        emb_x = self.positional_encoding(o, self.embedding_dim_pos)
        emb_d = self.positional_encoding(d, self.embedding_dim_dir)
        input = torch.hstack((emb_x,emb_d)).to(dtype=torch.float32)
        temp = self.block1(input)
        input2 = torch.hstack((temp, input)).to(dtype=torch.float32) # add skip input
        output = self.block2(input2)
        return output


In [6]:
def get_sample_positions(origins, angles, ground_truth_distance, num_bins = 100, device = 'cpu'):
    elev = angles[:,0]
    pan = angles[:,1]
    dir_x = torch.tensor(np.cos(elev)*np.cos(pan))      # [batch_size]
    dir_y = torch.tensor(np.cos(elev)*np.sin(pan))      # [batch_size]
    dir_z = torch.tensor(np.sin(elev))
    gt_tensor = torch.tensor(ground_truth_distance)
    # create a list of magnitudes with even spacing from 0 to 1
    t = torch.linspace(0,1, num_bins, device=device).expand(dir_x.shape[0], num_bins)  # [batch_size, num_bins]
    
    # preterb the spacing
    mid = (t[:, :-1] + t[:, 1:]) / 2.
    lower = torch.cat((t[:, :1], mid), -1)
    upper = torch.cat((mid, t[:, -1:]), -1)
    u = torch.rand(t.shape, device = device)
    t = lower + (upper - lower) * u  # [batch_size, nb_bins]
    t = rearrange(t, 'a b -> b a')  # [num_bins, batch_size]  take transpose so that multiplication can broadcast

    # multiply the magnitude to ground truth distance and add 3 meter
    t = torch.sqrt(t)
    t = gt_tensor*t
    t += 10

    # convert magnitudes into positions by multiplying it to the unit vector
    pos_x = dir_x*t     # [num_bins, batch_size]
    pos_y = dir_y*t
    pos_z = dir_z*t
    # concat them for output
    multiplied = rearrange([pos_x,pos_y,pos_z], 'c b n  -> (n b) c')   # [num_bin*batchsize, 3]
    # tile the origin values
    origins_tiled = repeat(origins, 'n c -> (n b) c', b = num_bins) # [num_bin*batch_size, 3]
    pos = torch.tensor(origins_tiled) + multiplied
    # tile the angle too
    angles_tiled = torch.tensor(repeat(angles, 'n c -> (n b) c', b = num_bins))
    return pos, angles_tiled, origins_tiled



In [7]:
# returns pytorch tensor of sigmoid of projected SDF
def get_actual_value(sample_positions, gt_distance, origins, num_bins=100):
    # calculate distance from sample_position
    temp = torch.tensor((sample_positions)**2)
    pos_distance = torch.sqrt(torch.sum(temp, dim=1, keepdim=True))
    # tile distances
    gt_distance_tiled = repeat(gt_distance, 'b -> (b n) 1', n=num_bins)
    # find the "projected" value
    sigmoid = nn.Sigmoid()
    values = sigmoid(-(pos_distance - gt_distance_tiled))
    return values

In [8]:
# sample data for testing
points = loadData()
dataset = prepareData(points)
test_batch = dataset[128:256,:]
ground_truth_distance = test_batch[:,0]
angles = test_batch[:,1:3]
origin = test_batch[:,3:6]
pos, ang, origins = get_sample_positions(origin, angles, ground_truth_distance ,num_bins=100)
val = (get_actual_value(pos, ground_truth_distance, origins)).to(dtype = torch.float32)

model = LiDAR_NeRF(hidden_dim=256)
rendered = model(pos, ang)
sigmoid = nn.Sigmoid()
rendered_sigmoid = sigmoid(rendered)
temp = torch.zeros_like(pos)

# for x in val:
#     print(x)
# print(min(rendered))
# loss_bce = nn.BCELoss()
# loss = loss_bce(rendered_sigmoid, val)
# print(loss)

  temp = torch.tensor((sample_positions)**2)


In [9]:
def train(model, optimizer, scheduler, dataloader, device = 'cpu', epoch = int(1e5), num_bins = 100):
    training_losses = []
    for _ in tqdm(range(epoch)):
        for batch in dataloader:
            # parse the batch
            ground_truth_distance = batch[:,0]
            angles = batch[:,1:3]
            origin = batch[:,3:6]
            
            sample_positions, sample_angles, sample_origins = get_sample_positions(origin, angles, ground_truth_distance, num_bins=num_bins)
            rendered_value = model(sample_positions.to(device), sample_angles.to(device))
            
            sigmoid = nn.Sigmoid()
            rendered_value_sigmoid = sigmoid(rendered_value)
            actual_value_sigmoided = (get_actual_value(sample_positions.to(device), ground_truth_distance.to(device), sample_origins.to(device))).to(dtype = torch.float32)
            # print(rendered_value_sigmoid[0:10]) 
            # loss = lossBCE(rendered_value, actual_value_sigmoided)  # + lossEikonal(model)
            loss_bce = nn.BCELoss()
            loss = loss_bce(rendered_value_sigmoid, actual_value_sigmoided)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            training_losses.append(loss.item())
        scheduler.step()
        print(loss.item())
    return training_losses
    

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} device")
points = loadData()
print("loaded data")
data_matrix = prepareData(points)
print("prepared data")
training_dataset = torch.from_numpy(data_matrix)
data_loader = DataLoader(training_dataset, batch_size=1024, shuffle = True)
model = LiDAR_NeRF(hidden_dim=512, embedding_dim_dir=10, device = device).to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=5e-4)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2, 4, 8, 16], gamma=0.5)
losses = train(model, optimizer, scheduler, data_loader, epoch = 8, device=device)

Using cuda device
loaded data
prepared data


  dir_x = torch.tensor(np.cos(elev)*np.cos(pan))      # [batch_size]
  dir_y = torch.tensor(np.cos(elev)*np.sin(pan))      # [batch_size]
  dir_z = torch.tensor(np.sin(elev))
  gt_tensor = torch.tensor(ground_truth_distance)
  pos = torch.tensor(origins_tiled) + multiplied
  angles_tiled = torch.tensor(repeat(angles, 'n c -> (n b) c', b = num_bins))
 12%|█▎        | 1/8 [03:09<22:03, 189.05s/it]

0.14784710109233856


 25%|██▌       | 2/8 [06:20<19:02, 190.47s/it]

0.13408119976520538


 38%|███▊      | 3/8 [09:33<15:58, 191.71s/it]

0.13046765327453613


 50%|█████     | 4/8 [12:48<12:52, 193.05s/it]

0.13021467626094818


 62%|██████▎   | 5/8 [16:03<09:40, 193.58s/it]

0.12028110027313232


 75%|███████▌  | 6/8 [19:18<06:28, 194.13s/it]

0.11808963119983673


 88%|████████▊ | 7/8 [22:32<03:14, 194.17s/it]

0.12474925816059113


100%|██████████| 8/8 [25:46<00:00, 193.28s/it]

0.1224590465426445





In [11]:
### Save the model
torch.save(model.state_dict(), 'local/models/version1_trial8.pth')

In [9]:
#### Load the model and try to "visualize" the model's datapoints
model_evel = LiDAR_NeRF(hidden_dim=512, embedding_dim_dir=10, device = 'cpu')
model_evel.load_state_dict(torch.load('local/models/version1_trial4.pth'))
model_evel.eval(); # Set the model to inference mode

In [12]:
# # sample data for testing
# points = loadData()
# dataset = prepareData(points)
# test_batch = dataset[0:512,:]
# ground_truth_distance = test_batch[:,0]
# angles = test_batch[:,1:3]
# origin = test_batch[:,3:]
# pos, ang = get_sample_positions(origin, angles, ground_truth_distance, num_bins=100)
# # pos = torch.zeros_like(pos)
# val = (get_actual_value(pos, ground_truth_distance)).to(dtype = torch.float32)

# with torch.no_grad():
#     pos_tensor = torch.tensor(pos)
#     ang_tensor = torch.tensor(ang)
#     output = model_evel(pos_tensor, ang_tensor)

# sig = nn.Sigmoid()
# output_sigmoided = sig(output)
# lossBCE = nn.BCELoss()
# loss = lossBCE(val, output_sigmoided)

In [13]:
### Render some structured pointcloud for evaluation
with torch.no_grad():
    dist = 0.1 # initial distanc forvisualization
    pos = torch.zeros((100000,3))
    ele = torch.linspace(-0.34, 0.3, 100)
    pan = torch.linspace(-3.14, 3.14, 1000)
    ele_tiled = repeat(ele, 'n -> (r n) 1', r = 1000)
    pan_tiled = repeat(pan, 'n -> (n r) 1', r = 100)
    ang = torch.cat((ele_tiled, pan_tiled), dim=1)

    # direction for each "point" from camera centre
    directions = torch.tensor(sph2cart(np.array(ang)))

    for i in range(500):
        output2 = model_evel(pos, ang)
        temp = torch.sign(output2)
        pos += directions * dist * temp
        # dist /= 2


In [10]:
### Render some structured pointcloud for evaluation
with torch.no_grad():
    dist = 32 # initial distanc forvisualization
    pos = torch.zeros((100000,3))
    ele = torch.linspace(-0.34, 0.3, 100)
    pan = torch.linspace(-3.14, 3.14, 1000)
    ele_tiled = repeat(ele, 'n -> (r n) 1', r = 1000)
    pan_tiled = repeat(pan, 'n -> (n r) 1', r = 100)
    ang = torch.cat((ele_tiled, pan_tiled), dim=1)

    # direction for each "point" from camera centre
    directions = torch.tensor(sph2cart(np.array(ang)))

    for i in range(10):
        output2 = model_evel(pos, ang)
        temp = torch.sign(output2)
        pos += directions * dist * temp
        dist /= 2


In [16]:
points = loadData()
data = prepareData(points)
ang = data[:,1:3]
dir = sph2cart(ang)
r = rearrange(data[:,0], 'a -> a 1')
pos_np = dir*r

In [17]:
### Save to csv for visualization
df_temp = pd.read_csv('local/visualize/dummy.csv')
df_temp = df_temp.head(554952)
# pos_np = pos.numpy()

df_temp['X'] = pos_np[:,0]
df_temp['Y'] = pos_np[:,1]
df_temp['Z'] = pos_np[:,2]
df_temp.to_csv('local/visualize/register_check2.csv', index=False)