In [137]:
import numpy as np
import pandas as pd
import math as m
from einops import rearrange
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import math

### Preperation (NOTE: Using meter as unit)

In [138]:
# convert pointcloud from cartisean coordinate to spherical coordinate
def cart2sph(xyz):
    x = xyz[:,0]
    y = xyz[:,1]
    z = xyz[:,2]
    XsqPlusYsq = x**2 + y**2
    r = np.sqrt(list(XsqPlusYsq + z**2))
    elev = np.arctan2(list(z), np.sqrt(list(XsqPlusYsq)))
    pan = np.arctan2(list(x), list(y))
    output = np.array([r, elev, pan])
    return rearrange(output, 'a b -> b a') 

In [139]:
# Specify the directory path
dataset_path = 'datasets/testing1'

# List all files in the specified path, ignoring directories
files = [f for f in os.listdir(dataset_path) if os.path.isfile(os.path.join(dataset_path, f))]
files.sort()

# read the files
points_xyz = []
for s in files:
    path = 'datasets/testing1/' + s
    df = pd.read_csv(path)
    a = df.to_numpy()
    b = a[:,8:11]
    points_xyz.append(b)

# Now we can find the fiew direction of each points:
# NOTE: points in spherical coordinate are arranged: [r, elev, pan]
points_sphere = []
for points in points_xyz:
    points_sphere.append(cart2sph(points))

In [140]:
### we now process the data
# Translation vectors for points in each view, we are using camera centre at first frame as origin of world coordinate
# NOTE: translation vectors below are found by assuming transformation between frames are translations, and obatined by manually finding corrspondance
# They are translation of the same corrspondance across different frames
# HARD CODED HERE
t0 = np.array([0,0,0])
t1 = np.array([-0.671,-0.016,0.215])
t2 = np.array([-1.825,-0.091,0.147])
t3 = np.array([-2.661,-0.263,0.166])
t4 = np.array([-3.607,-0.156,0.039])
translations = [t0, t1, t2, t3, t4]

# camera centre locations
centres = [-t for t in translations]
centres_data = []
for i,c in enumerate(centres):
    l = len(points_sphere[i])
    temp = np.tile(c, (l, 1))
    centres_data.append(temp)

In [141]:
# stack the points into one big matrix
stacked = []
for i in range(len(points_sphere)):
    temp = np.hstack((points_sphere[i], centres_data[i]))
    stacked.append(temp)

dataset = np.array([])
for i in range(len(stacked)):
    if i == 0:
        dataset = stacked[i]
    else:
        dataset = np.vstack((dataset, stacked[i]))
np.random.shuffle(dataset)

# Filter out points where the distance value is = 0
mask = dataset[:, 0] != 0
dataset = dataset[mask]

### Now prepare to train the model

In [142]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} device")

Using cuda device


In [143]:
# Convert dataset to pytorch tensor
X = np.array(dataset[:,1:])
y = np.array(dataset[:,0])

# Convert to tensor:
X_tensor = torch.from_numpy(X).double()
y_tensor = torch.from_numpy(y).double()

In [145]:
dataset = TensorDataset(X_tensor, y_tensor)
batch_size = 256
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
class SphericalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

In [146]:
# Now we prepare to train the model
features = 5

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(5, 512),  # Input layer with 5 inputs and 10 outputs
            nn.ReLU(),                # Activation function
            nn.Linear(512, 512),        # Hidden layer with 512 neurons
            nn.ReLU(),                # Activation function
            nn.Linear(512, 512),        # Hidden layer with 512 neurons
            nn.ReLU(),                # Activation function
            nn.Linear(512, 512),        # Hidden layer with 512 neurons
            nn.ReLU(),                # Activation function
            nn.Linear(512, 512),        # Hidden layer with 512 neurons
            nn.ReLU(),                # Activation function
            nn.Linear(512, 1)          # Output layer with 1 output
        )
        
    def forward(self, x):
        return self.layers(x)

# Initialize the model
model = MLP().to(device)

# Loss and Optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [147]:
# Training Loop
num_epochs = 50
for epoch in range(num_epochs):
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device).float(), targets.to(device).float()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(loss)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


tensor(764.6520, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(913.3753, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1009.3695, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1009.9080, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(2277.4998, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(408.1846, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(495.0412, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(954.9431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(455.7444, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(785.1834, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(521.4275, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(689.8781, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(447.1741, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(748.9323, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(498.1920, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(786.8657, device='cuda:0', grad_fn=<MseLossBackward0>)
tenso