In [1]:
import numpy as np
import pandas as pd
import math as m
from einops import rearrange, repeat
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
from tqdm import tqdm


### Preperation (NOTE: Using meter as unit)

In [2]:
# Utility functions
# convert pointcloud from cartisean coordinate to spherical coordinate
def cart2sph(xyz):
    x = xyz[:,0]
    y = xyz[:,1]
    z = xyz[:,2]
    XsqPlusYsq = x**2 + y**2
    r = np.sqrt(list(XsqPlusYsq + z**2))
    elev = np.arctan2(list(z), np.sqrt(list(XsqPlusYsq)))
    pan = np.arctan2(list(x), list(y))

    output = np.array([r, elev, pan])
    return rearrange(output, 'a b -> b a') #take transpose


def sph2cart(ang):
    ele = ang[:,0]
    pan = ang[:,1]
    x = np.cos(ele)*np.cos(pan)
    y = np.cos(ele)*np.sin(pan)
    z = np.sin(ele)
    output = np.array([x,y,z])
    return rearrange(output, 'a b -> b a') #take transpose

In [3]:
def loadData():
    # Specify the directory path
    dataset_path = 'datasets/testing1'

    # List all files in the specified path, ignoring directories
    files = [f for f in os.listdir(dataset_path) if os.path.isfile(os.path.join(dataset_path, f))]
    files.sort()

    # read the files
    points_xyz = []
    for s in files:
        path = 'datasets/testing1/' + s
        df = pd.read_csv(path)
        a = df.to_numpy()
        points_xyz.append(a[:,8:11])
    return points_xyz

def prepareData(points_xyz):
    # Find the fiew direction of each points:
    # NOTE: points in spherical coordinate are arranged: [r, elev, pan]
    points_sphere = []
    for points in points_xyz:
        points_sphere.append(cart2sph(points))

    ### Process the data
    # Translation vectors for points in each view, we are using camera centre at first frame as origin of world coordinate
    # NOTE: translation vectors below are found by assuming transformation between frames are translations, and obatined by manually finding corrspondance
    # They are translation of the same corrspondance across different frames
    # HARD CODED HERE
    t0 = np.array([0,0,0])
    t1 = np.array([-0.671,-0.016,0.215])
    t2 = np.array([-1.825,-0.091,0.147])
    t3 = np.array([-2.661,-0.263,0.166])
    t4 = np.array([-3.607,-0.156,0.039])
    translations = [t0, t1, t2, t3, t4]

    # camera centre locations
    centres = [-t for t in translations]
    centres_data = []
    for i,c in enumerate(centres):
        l = len(points_sphere[i])
        temp = np.tile(c, (l, 1))
        centres_data.append(temp)

    # stack the points into one big matrix
    stacked = []
    for i in range(len(points_sphere)):
        temp = np.hstack((points_sphere[i], centres_data[i]))
        stacked.append(temp)

    dataset = np.array([])
    for i in range(len(stacked)):
        if i == 0:
            dataset = stacked[i]
        else:
            dataset = np.vstack((dataset, stacked[i]))
    np.random.shuffle(dataset)

    # Mid pass filter, for distance value between 2 and 50 meter
    mask1 = dataset[:,0] > 2
    dataset = dataset[mask1]
    mask2 = dataset[:,0] < 50
    dataset = dataset[mask2]

    return dataset

In [4]:
class LiDAR_NeRF(nn.Module):
    def __init__(self, embedding_dim_pos = 10, embedding_dim_dir = 4, hidden_dim = 256, device = 'cuda'):
        super(LiDAR_NeRF, self).__init__()
        self.device = device
        self.embedding_dim_dir = embedding_dim_dir
        self.embedding_dim_pos = embedding_dim_pos
        self.block1 = nn.Sequential(
            nn.Linear(embedding_dim_pos * 6 + 3 + embedding_dim_dir * 4 + 2, hidden_dim), nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
        )
        
        self.block2 = nn.Sequential(
            nn.Linear(embedding_dim_pos * 6 + 3 + embedding_dim_dir * 4 + 2 + hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),               
            nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
            nn.Linear(hidden_dim,1)
        )
        
    @staticmethod
    def positional_encoding(x, L):
        out = [x]
        for j in range(L):
            out.append(torch.sin(2 ** j * x))
            out.append(torch.cos(2 ** j * x))
        return torch.cat(out, dim=1)

    def forward(self, o, d):
        emb_x = self.positional_encoding(o, self.embedding_dim_pos)
        emb_d = self.positional_encoding(d, self.embedding_dim_dir)
        input = torch.hstack((emb_x,emb_d)).to(dtype=torch.float32)
        temp = self.block1(input)
        input2 = torch.hstack((temp, input)).to(dtype=torch.float32) # add skip input
        output = self.block2(input2)
        return output

In [5]:
def get_sample_positions(origins, angles, ground_truth_distance, num_bins = 100, device = 'cpu'):
    elev = angles[:,0]
    pan = angles[:,1]
    dir_x = torch.tensor(np.cos(elev)*np.cos(pan))      # [batch_size]
    dir_y = torch.tensor(np.cos(elev)*np.sin(pan))      # [batch_size]
    dir_z = torch.tensor(np.sin(elev))
    gt_tensor = torch.tensor(ground_truth_distance)
    # create a list of magnitudes with even spacing from 0 to 1
    t = torch.linspace(0,1, num_bins, device=device).expand(dir_x.shape[0], num_bins)  # [batch_size, num_bins]
    
    # preterb the spacing
    mid = (t[:, :-1] + t[:, 1:]) / 2.
    lower = torch.cat((t[:, :1], mid), -1)
    upper = torch.cat((mid, t[:, -1:]), -1)
    u = torch.rand(t.shape, device = device)
    t = lower + (upper - lower) * u  # [batch_size, nb_bins]
    t = rearrange(t, 'a b -> b a')  # [num_bins, batch_size]  take transpose so that multiplication can broadcast

    # multiply the magnitude to ground truth distance and add 3 meter
    t = gt_tensor*t
    t += 3

    # convert magnitudes into positions by multiplying it to the unit vector
    pos_x = dir_x*t     # [num_bins, batch_size]
    pos_y = dir_y*t
    pos_z = dir_z*t
    # concat them for output
    multiplied = rearrange([pos_x,pos_y,pos_z], 'c b n  -> (n b) c')   # [num_bin*batchsize, 3]
    # tile the origin values
    origins_tiled = repeat(origins, 'n c -> (n b) c', b = num_bins) # [num_bin*batch_size, 3]
    pos = torch.tensor(origins_tiled) + multiplied
    # tile the angle too
    angles_tiled = torch.tensor(repeat(angles, 'n c -> (n b) c', b = num_bins))
    return pos, angles_tiled



In [6]:
# returns pytorch tensor of sigmoid of projected SDF
def get_actual_value(sample_positions, gt_distance, num_bins=100):
    # tile distances
    gt_distance_tiled = repeat(gt_distance, 'b -> (b n) 1', n=num_bins)
    # calculate distance from sample_position
    temp = torch.tensor(sample_positions**2)
    pos_distance = torch.sqrt(torch.sum(temp, dim=1, keepdim=True))
    # find the "projected" value
    sigmoid = nn.Sigmoid()
    values = sigmoid(-(pos_distance - gt_distance_tiled))
    return values

In [7]:
# sample data for testing
points = loadData()
dataset = prepareData(points)
test_batch = dataset[128:256,:]
ground_truth_distance = test_batch[:,0]
angles = test_batch[:,1:3]
origin = test_batch[:,3:7]
pos, ang = get_sample_positions(origin, angles, ground_truth_distance ,num_bins=100)
val = (get_actual_value(pos, ground_truth_distance)).to(dtype = torch.float32)

model = LiDAR_NeRF(hidden_dim=256)
rendered = model(pos, ang)
sigmoid = nn.Sigmoid()
rendered_sigmoid = sigmoid(rendered)
temp = torch.zeros_like(pos)

# for x in val:
#     print(x)
# print(min(rendered))
# loss_bce = nn.BCELoss()
# loss = loss_bce(rendered_sigmoid, val)
# print(loss)

  temp = torch.tensor(sample_positions**2)


In [8]:
def train(model, optimizer, scheduler, dataloader, device = 'cpu', epoch = int(1e5), num_bins = 100):
    training_losses = []
    for _ in tqdm(range(epoch)):
        for batch in dataloader:
            # parse the batch
            ground_truth_distance = batch[:,0]
            angles = batch[:,1:3]
            origin = batch[:,3:7]
            
            sample_positions, sample_angles = get_sample_positions(origin, angles, ground_truth_distance, num_bins=num_bins)
            print(sample_positions[0:10])
            
            rendered_value = model(sample_positions.to(device), sample_angles.to(device))
            
            sigmoid = nn.Sigmoid()
            rendered_value_sigmoid = sigmoid(rendered_value)
            actual_value_sigmoided = (get_actual_value(sample_positions.to(device), ground_truth_distance.to(device))).to(dtype = torch.float32)
            # print(rendered_value_sigmoid[0:10]) 
            # loss = lossBCE(rendered_value, actual_value_sigmoided)  # + lossEikonal(model)
            loss_bce = nn.BCELoss()
            loss = loss_bce(rendered_value_sigmoid, actual_value_sigmoided)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            training_losses.append(loss.item())
        scheduler.step()
        print(loss.item())
    return training_losses
    

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} device")
points = loadData()
print("loaded data")
data_matrix = prepareData(points)
print("prepared data")
training_dataset = torch.from_numpy(data_matrix)
data_loader = DataLoader(training_dataset, batch_size=1024, shuffle = True)
model = LiDAR_NeRF(hidden_dim=512, embedding_dim_dir=10, device = device).to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=5e-4)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2, 4, 8, 16], gamma=0.5)
losses = train(model, optimizer, scheduler, data_loader, epoch = 4, device=device)


tensor([[-1.2792, -0.4808, -0.0902],
        [-1.8001, -0.5768, -0.0807],
        [-2.0652, -0.6256, -0.0758],
        [-2.5410, -0.7132, -0.0671],
        [-3.0265, -0.8027, -0.0583],
        [-3.2955, -0.8522, -0.0533],
        [-4.1795, -1.0150, -0.0372],
        [-4.3110, -1.0393, -0.0348],
        [-4.8093, -1.1311, -0.0257],
        [-5.2568, -1.2135, -0.0175]], dtype=torch.float64)
tensor([[-0.4046, -1.9273, -0.3664],
        [-0.4720, -1.9883, -0.3730],
        [-0.5996, -2.1038, -0.3855],
        [-0.6906, -2.1862, -0.3945],
        [-0.8156, -2.2993, -0.4068],
        [-0.9497, -2.4208, -0.4200],
        [-1.0933, -2.5507, -0.4341],
        [-1.1557, -2.6072, -0.4402],
        [-1.2635, -2.7048, -0.4509],
        [-1.4008, -2.8290, -0.4644]], dtype=torch.float64)
tensor([[ 2.7216, -2.7609,  0.2752],
        [ 2.7559, -2.8701,  0.2914],
        [ 2.8187, -3.0699,  0.3210],
        [ 2.8575, -3.1931,  0.3392],
        [ 2.8863, -3.2847,  0.3527],
        [ 2.9773, -3.5744,  0.3

  0%|          | 0/4 [00:18<?, ?it/s]


KeyboardInterrupt: 

In [12]:
### Save the model
torch.save(model.state_dict(), 'local/models/version1_trial3.pth')

In [14]:
#### Load the model and try to "visualize" the model's datapoints
model_evel = LiDAR_NeRF(hidden_dim=512, embedding_dim_dir=10, device = 'cpu')
model_evel.load_state_dict(torch.load('local/models/version1_trial3.pth'))
model_evel.eval(); # Set the model to inference mode

In [15]:
# sample data for testing
points = loadData()
dataset = prepareData(points)
test_batch = dataset[0:512,:]
ground_truth_distance = test_batch[:,0]
angles = test_batch[:,1:3]
origin = test_batch[:,3:]
pos, ang = get_sample_positions(origin, angles, ground_truth_distance, num_bins=100)
# pos = torch.zeros_like(pos)
val = (get_actual_value(pos, ground_truth_distance)).to(dtype = torch.float32)

with torch.no_grad():
    pos_tensor = torch.tensor(pos)
    ang_tensor = torch.tensor(ang)
    output = model_evel(pos_tensor, ang_tensor)

sig = nn.Sigmoid()
output_sigmoided = sig(output)
lossBCE = nn.BCELoss()
loss = lossBCE(val, output_sigmoided)

  temp = torch.tensor(sample_positions**2)
  pos_tensor = torch.tensor(pos)
  ang_tensor = torch.tensor(ang)


In [16]:
output_sigmoided[0:100]

tensor([[0.0745],
        [0.0711],
        [0.0677],
        [0.0668],
        [0.0698],
        [0.0666],
        [0.0620],
        [0.0588],
        [0.0581],
        [0.0562],
        [0.0542],
        [0.0527],
        [0.0524],
        [0.0468],
        [0.0471],
        [0.0464],
        [0.0428],
        [0.0416],
        [0.0389],
        [0.0401],
        [0.0376],
        [0.0341],
        [0.0328],
        [0.0316],
        [0.0331],
        [0.0330],
        [0.0320],
        [0.0291],
        [0.0288],
        [0.0242],
        [0.0245],
        [0.0242],
        [0.0219],
        [0.0214],
        [0.0207],
        [0.0201],
        [0.0196],
        [0.0195],
        [0.0177],
        [0.0174],
        [0.0163],
        [0.0153],
        [0.0146],
        [0.0136],
        [0.0138],
        [0.0147],
        [0.0137],
        [0.0129],
        [0.0117],
        [0.0108],
        [0.0119],
        [0.0104],
        [0.0104],
        [0.0103],
        [0.0094],
        [0

In [106]:
sig = nn.Sigmoid()
a = torch.tensor([-2.5], dtype = torch.float)
sig(a)

tensor([0.0759])

In [15]:
### Render some structured pointcloud for evaluation
with torch.no_grad():
    dist = 1 # initial distanc forvisualization
    pos = torch.zeros((100000,3))
    ele = torch.linspace(-0.34, 0.3, 100)
    pan = torch.linspace(-3.14, 3.14, 1000)
    ele_tiled = repeat(ele, 'n -> (r n) 1', r = 1000)
    pan_tiled = repeat(pan, 'n -> (n r) 1', r = 100)
    ang = torch.cat((ele_tiled, pan_tiled), dim=1)

    # direction for each "point" from camera centre
    directions = torch.tensor(sph2cart(np.array(ang)))

    for i in range(50):
        output2 = model_evel(pos, ang)
        temp = torch.sign(output2)
        pos += directions * dist * temp
        # dist /= 2


In [17]:
### Save to csv for visualization
df_temp = pd.read_csv('local/visualize/dummy.csv')
df_temp = df_temp.head(100000)
pos_np = pos.numpy()
df_temp['X'] = pos_np[:,0]
df_temp['Y'] = pos_np[:,1]
df_temp['Z'] = pos_np[:,2]
print(df_temp.head())

   Version  Slot ID  LiDAR Index  Rsvd  Error Code  Timestamp Type  Data Type  \
0        5        7            1     0  0x00000200               0          0   
1        5        7            1     0  0x00000200               0          0   
2        5        7            1     0  0x00000200               0          0   
3        5        7            1     0  0x00000200               0          0   
4        5        7            1     0  0x00000200               0          0   

      Timestamp          X         Y         Z  Reflectivity  Tag  Ori_x  \
0  339330000000 -11.313043 -0.018017 -4.001845            24    0   8128   
1  339330000000 -11.338676 -0.018057 -3.928627            24    0   8132   
2  339330000000 -11.363835 -0.018097 -3.855245            24    0   8137   
3  339330000000 -11.388520 -0.018137 -3.781701            28    0   8132   
4  339330000000 -11.412731 -0.018175 -3.708000            28    0   8132   

   Ori_y  Ori_z  Ori_radius  Ori_theta  Ori_phi  
0  236

In [18]:
df_temp.to_csv('local/visualize/visualize.csv', index=False)