 **Point Cloud Encoder**

 *Importing Libraries*

In [1]:
import torch
import open3d as o3d
import pandas as pd
import numpy as np
from itertools import product
import os
import torchsparse
from code_snippets.readers import read_points_file
from torch import nn
import torchsparse.nn as spnn
import torchsparse.nn.functional as F
import torchsparse.utils as sp_utils
from torchsparse import SparseTensor
from torchsparse.utils.quantize import sparse_quantize

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


 *Initializing the Point Clouds Data*

In [2]:
def read_points_file(filepath):
    assert os.path.exists(filepath), f"Could not find point cloud file: {filepath}"
    df = pd.read_csv(filepath, compression="gzip")
    point_cloud = df[["px_world", "py_world", "pz_world"]]
    dist_std = df["dist_std"]
    # print(f"Loaded point cloud with {len(point_cloud)} points.")
    return point_cloud.to_numpy(), dist_std.to_numpy()

points, dist_std = read_points_file("/home/mseleem/Desktop/3d_model_pt/0/semidense_points.csv.gz")


 *Voxelize Point Clouds*

In [3]:
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
voxel_size = 0.015  # 1.5cm
voxel_grid = o3d.geometry.VoxelGrid.create_from_point_cloud(pcd, voxel_size)


In [4]:
print(f"Number of points: {len(np.asarray(pcd.points))}")
print("Number of voxels:", len(voxel_grid.get_voxels()))

Number of points: 433426
Number of voxels: 331598


*Visualize*

In [5]:
o3d.visualization.draw_geometries([voxel_grid])




Konsa point kis voxel mein hai? or baad mein debugging checks hain


In [6]:
voxel_map = {}

for i, point in enumerate(points):
    voxel_index = tuple(voxel_grid.get_voxel(point))
    if voxel_index in voxel_map:
        voxel_map[voxel_index].append(i)
    else:
        voxel_map[voxel_index] = [i]


In [7]:
# Check for empty voxels
empty_voxels = [k for k, v in voxel_map.items() if len(v) == 0]
assert len(empty_voxels) == 0, "There are empty voxels in the mapping!"

In [8]:
# Check total points
total_points_in_voxels = sum(len(v) for v in voxel_map.values())
print(f"Total points in voxels: {total_points_in_voxels}")
print(f"Total original points: {len(points)}")
assert total_points_in_voxels == len(points), f"Mismatch in point counts: {total_points_in_voxels} != {len(points)}"


Total points in voxels: 433426
Total original points: 433426


In [9]:
# Print a few voxel mappings for verification
for k, v in list(voxel_map.items())[20:30]:
    print(f"Voxel index: {k}, Point indices: {v}")


Voxel index: (1014, 611, 579), Point indices: [20]
Voxel index: (1017, 610, 580), Point indices: [21]
Voxel index: (1042, 594, 579), Point indices: [22, 1482]
Voxel index: (984, 621, 427), Point indices: [23, 259852]
Voxel index: (984, 622, 426), Point indices: [24]
Voxel index: (1007, 620, 588), Point indices: [25]
Voxel index: (982, 625, 419), Point indices: [26, 1489, 80391, 254305]
Voxel index: (993, 621, 420), Point indices: [27]
Voxel index: (974, 630, 418), Point indices: [28]
Voxel index: (995, 621, 418), Point indices: [29]


In [10]:
# Aggregate features for each voxel, print aggregated features for sample voxels, and convert to tensors
aggregated_features = []
filtered_voxel_indices = []
print("\nAggregated features for sample voxels (indices 20 to 30):")
for idx, (voxel_index, point_indices) in enumerate(voxel_map.items()):
    aggregated_feature = np.mean(dist_std[point_indices])  # Example aggregation: mean
    aggregated_features.append(aggregated_feature)
    filtered_voxel_indices.append(voxel_index)
    
    # Print the aggregated feature for voxels with indices 20 to 30
    if 20 <= idx < 30:
        print(f"Voxel index: {voxel_index}, Aggregated feature: {aggregated_feature:.4f}")



Aggregated features for sample voxels (indices 20 to 30):
Voxel index: (1014, 611, 579), Aggregated feature: 0.0207
Voxel index: (1017, 610, 580), Aggregated feature: 0.0214
Voxel index: (1042, 594, 579), Aggregated feature: 0.0264
Voxel index: (984, 621, 427), Aggregated feature: 0.0036
Voxel index: (984, 622, 426), Aggregated feature: 0.0054
Voxel index: (1007, 620, 588), Aggregated feature: 0.0248
Voxel index: (982, 625, 419), Aggregated feature: 0.0058
Voxel index: (993, 621, 420), Aggregated feature: 0.0099
Voxel index: (974, 630, 418), Aggregated feature: 0.0052
Voxel index: (995, 621, 418), Aggregated feature: 0.0106


In [11]:
voxel_indices_tensor = torch.tensor(filtered_voxel_indices, dtype=torch.int32).cuda()
features_tensor = torch.tensor(aggregated_features, dtype=torch.float32).view(-1, 1).cuda()

print(f"Voxel indices tensor dimensions: {voxel_indices_tensor.shape}")
print(f"Features tensor dimensions: {features_tensor.shape}")

Voxel indices tensor dimensions: torch.Size([331598, 3])
Features tensor dimensions: torch.Size([331598, 1])


In [12]:
# Add batch dimension to voxel indices
batch_indices = torch.zeros((voxel_indices_tensor.shape[0], 1), dtype=torch.int32).cuda()
voxel_indices_tensor_with_batch = torch.cat([batch_indices, voxel_indices_tensor], dim=1)

In [13]:
# Create sparse tensor
sparse_tensor = SparseTensor(features_tensor, voxel_indices_tensor_with_batch)

print(f"Sparse tensor feature dimensions: {sparse_tensor.F.shape}")
print(f"Sparse tensor coordinate dimensions: {sparse_tensor.C.shape}")

Sparse tensor feature dimensions: torch.Size([331598, 1])
Sparse tensor coordinate dimensions: torch.Size([331598, 4])


In [14]:
class SparseResNetEncoder(nn.Module):
    def __init__(self):
        super(SparseResNetEncoder, self).__init__()
        self.conv1 = spnn.Conv3d(1, 16, kernel_size=3, stride=2)
        self.conv2 = spnn.Conv3d(16, 32, kernel_size=3, stride=2)
        self.conv3 = spnn.Conv3d(32, 64, kernel_size=3, stride=2)
        self.conv4 = spnn.Conv3d(64, 128, kernel_size=3, stride=2)
        self.conv5 = spnn.Conv3d(128, 512, kernel_size=3, stride=2)

    def forward(self, x):
        print(f"Input to conv1: {x.F.shape}, {x.C.shape}")
        x = self.conv1(x)
        print(f"Output of conv1: {x.F.shape}, {x.C.shape}")
        x = self.conv2(x)
        print(f"Output of conv2: {x.F.shape}, {x.C.shape}")
        x = self.conv3(x)
        print(f"Output of conv3: {x.F.shape}, {x.C.shape}")
        x = self.conv4(x)
        print(f"Output of conv4: {x.F.shape}, {x.C.shape}")
        x = self.conv5(x)
        print(f"Output of conv5: {x.F.shape}, {x.C.shape}")
        return x



In [15]:
encoder = SparseResNetEncoder().cuda()
encoded_features = encoder(sparse_tensor.cuda())
encoded_coordinates = encoded_features.C.float().cuda()

print("Encoded coordinates before removing batch dimension:")
print(encoded_coordinates[:10])  

# Remove the batch dimension
encoded_coordinates = encoded_coordinates[:, 1:]

print("Encoded coordinates after removing batch dimension:")
print(encoded_coordinates[:10]) 

# Concatenate encoded features with positional encoding
encoded_features_with_pos = torch.cat([encoded_features.F, encoded_coordinates], dim=1)

print("Encoded features with positional encoding:")
print(encoded_features_with_pos[:10]) 

# Convert to numpy array for sorting
coordinates_np = encoded_coordinates.cpu().numpy()

print("Coordinates as numpy array:")
print(coordinates_np[:10])  

# Perform lexicographical sort: x-coordinate, then y-coordinate, then z-coordinate
sorted_indices_np = np.lexsort((coordinates_np[:, 2], coordinates_np[:, 1], coordinates_np[:, 0]))

print("Sorted indices (numpy):")
print(sorted_indices_np[:10])  

# Convert sorted indices back to PyTorch tensor
sorted_indices = torch.from_numpy(sorted_indices_np).long().cuda()

print("Sorted indices (PyTorch):")
print(sorted_indices[:10])  

# Apply sorted indices to the features with positional encoding
sorted_features_with_pos = encoded_features_with_pos[sorted_indices]

print("Encoded and sorted features with positional encoding:")
print(sorted_features_with_pos.shape)
print(sorted_features_with_pos[:10])  


Input to conv1: torch.Size([331598, 1]), torch.Size([331598, 4])
Output of conv1: torch.Size([473947, 16]), torch.Size([473947, 4])
Output of conv2: torch.Size([224334, 32]), torch.Size([224334, 4])
Output of conv3: torch.Size([78220, 64]), torch.Size([78220, 4])
Output of conv4: torch.Size([21792, 128]), torch.Size([21792, 4])
Output of conv5: torch.Size([5168, 512]), torch.Size([5168, 4])
Encoded coordinates before removing batch dimension:
tensor([[ 0.,  0., 11., 15.],
        [ 0.,  0., 11., 16.],
        [ 0.,  0., 12., 15.],
        [ 0.,  0., 12., 16.],
        [ 0.,  8.,  4., 15.],
        [ 0.,  8.,  4., 16.],
        [ 0.,  8.,  4., 17.],
        [ 0.,  8.,  4., 18.],
        [ 0.,  8.,  5., 15.],
        [ 0.,  8.,  5., 16.]], device='cuda:0')
Encoded coordinates after removing batch dimension:
tensor([[ 0., 11., 15.],
        [ 0., 11., 16.],
        [ 0., 12., 15.],
        [ 0., 12., 16.],
        [ 8.,  4., 15.],
        [ 8.,  4., 16.],
        [ 8.,  4., 17.],
        

In [16]:
# # Initialize the model
# encoder = SparseResNetEncoder().cuda()

# # Forward pass
# encoded_features = encoder(sparse_tensor.cuda())

In [17]:
# # Append coordinates to feature vectors for positional encoding
# encoded_coordinates = encoded_features.C.float().cuda()

# # Check the shapes of encoded_features.F and coordinates
# print(f"Shape of encoded_features.F: {encoded_features.F.shape}")
# print(f"Shape of coordinates.C: {encoded_coordinates.shape}")

In [18]:
# encoded_coordinates = encoded_coordinates[:, 1:] # Remove batch dimension
# # print(encoded_coordinates.shape)
# print(encoded_coordinates[1:10])


In [19]:
# # Concatenate the features with the coordinates along the feature dimension
# encoded_features_with_pos = torch.cat([encoded_features.F, encoded_coordinates], dim=1)

# print(encoded_features_with_pos.shape)

In [20]:
# # Sort the feature vectors lexicographically by coordinates using numpy
# coordinates_np = encoded_coordinates.cpu().numpy()
# sorted_indices_np = np.lexsort((coordinates_np[:, 2], coordinates_np[:, 1], coordinates_np[:, 0]))
# sorted_indices = torch.from_numpy(sorted_indices_np).long().cuda()

# # Apply sorting to the concatenated features
# sorted_features_with_pos = encoded_features_with_pos[sorted_indices]

# # Print the encoded and sorted features with positional encoding
# print("Encoded and sorted features with positional encoding:")
# print(sorted_features_with_pos.shape)