This notebook is used to develop functions for creating pointcloud labels from the cylinder models. Both the offset-vectors as well as the noise-labels are generated

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from fastprogress import progress_bar, master_bar
import os 
import sys
import torch

# Get access to all the files in the repository
cwd = os.getcwd()
parentDir = os.path.dirname( cwd )
sys.path.append(parentDir)

from Modules.Utils import get_device

First load a single cylinder model and test the function for finding the closest cylinder

In [9]:
cylinders = pd.read_csv( os.path.join(parentDir, 'data', 'raw', 'QSM', 'detailed', '33_1_000000.csv'), header=0)
cylinders.columns = cylinders.columns.str.strip() # Clean whitespaces
cylinders.head()

Unnamed: 0,type,ID,parentID,startX,startY,startZ,endX,endY,endZ,radius,...,treeSpecies,translateX,translateY,translatez,gvA,gvB,gvC,glA,glB,glC
0,cylinder,0,-1,27.268867,23.687374,-1.57451,27.310114,23.677517,-1.34237,0.147356,...,unknownSpecies,0.0,0.0,0.0,0.01,0.401606,0.0,0.01,0.401606,0.0
1,cylinder,1,0,27.310114,23.677517,-1.34237,27.323258,23.685699,-1.127219,0.1473,...,unknownSpecies,0.0,0.0,0.0,0.01,0.401606,0.0,0.01,0.401606,0.0
2,cylinder,2,1,27.323258,23.685699,-1.127219,27.372589,23.700213,-0.947553,0.147255,...,unknownSpecies,0.0,0.0,0.0,0.01,0.401606,0.0,0.01,0.401606,0.0
3,cylinder,3,2,27.372589,23.700213,-0.947553,27.363788,23.667894,-0.756015,0.147214,...,unknownSpecies,0.0,0.0,0.0,0.01,0.401606,0.0,0.01,0.401606,0.0
4,cylinder,4,3,27.363788,23.667894,-0.756015,27.380559,23.656645,-0.605488,0.147182,...,unknownSpecies,0.0,0.0,0.0,0.01,0.401606,0.0,0.01,0.401606,0.0


In [10]:
def closest_cylinder(point, cylinders):
    """
    Find the closest cylinder to a given point in 3D space.

    Parameters:
        point: A 3D point as a numpy array [x, y, z].
        cylinders: A list of dictionaries, where each dictionary represents a cylinder with:
                   - 'start': The start point of the cylinder axis as a numpy array [x, y, z].
                   - 'end': The end point of the cylinder axis as a numpy array [x, y, z].
                   - 'radius': The radius of the cylinder.

    Returns:
        The index of the closest cylinder and the corresponding distance.
    """
    closest_distance = float('inf')
    closest_index = -1
    closet_offset_vector = np.zeros(3)

    for _, cylinder in cylinders.iterrows():
        start = np.array([cylinder['startX'], cylinder['startY'], cylinder['startZ']])
        end = np.array([cylinder['endX'], cylinder['endY'], cylinder['endZ']])
        radius = cylinder['radius']
        id = cylinder['ID']

        # Cylinder axis vector
        axis = end - start
        axis_length = np.linalg.norm(axis)
        axis_unit = axis / axis_length

        # Vector from start point to the given point
        point_vector = point - start

        # Projection of point_vector onto the cylinder axis
        projection_length = np.dot(point_vector, axis_unit)
        projection_point = start + projection_length * axis_unit

        # Clamp the projection point to the cylinder segment
        projection_length_clamped = np.clip(projection_length, 0, axis_length)
        projection_point_clamped = start + projection_length_clamped * axis_unit

        # Compute the distance from the point to the clamped projection point
        distance_to_axis = np.linalg.norm(point - projection_point_clamped)

        # Subtract the cylinder radius to get the distance to the cylinder surface
        distance_to_cylinder_surface = abs(distance_to_axis - radius)

        # Update the closest cylinder
        if distance_to_cylinder_surface < closest_distance:
            closest_distance = distance_to_cylinder_surface
            closest_index = id
            closet_offset_vector = projection_point_clamped - point

    return closest_index, closest_distance, closet_offset_vector

def closest_cylinder_vectorized(point, cylinders):
    # Extract numpy arrays from the DataFrame
    start = cylinders[['startX', 'startY', 'startZ']].to_numpy()
    end = cylinders[['endX', 'endY', 'endZ']].to_numpy()
    radius = cylinders['radius'].to_numpy()
    IDs = cylinders['ID'].to_numpy()

    # Compute axis vectors and lengths
    axis = end - start
    axis_length = np.linalg.norm(axis, axis=1)
    axis_unit = axis / axis_length[:, np.newaxis]

    # Compute vectors from start to the point
    point_vector = point - start

    # Project point_vector onto the cylinder axis
    projection_length = np.sum(point_vector * axis_unit, axis=1)
    projection_point = start + projection_length[:, np.newaxis] * axis_unit

    # Clamp projection to the cylinder segment
    projection_length_clamped = np.clip(projection_length, 0, axis_length)
    projection_point_clamped = start + projection_length_clamped[:, np.newaxis] * axis_unit

    # Compute distances to the cylinder surface
    distances_to_axis = np.linalg.norm(point - projection_point_clamped, axis=1)
    distances_to_surface = np.abs(distances_to_axis - radius)

    # Find the closest cylinder
    closest_idx = np.argmin(distances_to_surface)
    return IDs[closest_idx], distances_to_surface[closest_idx], projection_point_clamped[closest_idx] - point

def closest_cylinder_cuda(point, start, radius, axis_length, axis_unit, IDs, device):
    """
    Find the closest cylinder to a given point using GPU acceleration with PyTorch.
    
    Parameters:
        point: A 3D point as a numpy array [x, y, z].
        cylinders: A pandas DataFrame with cylinder attributes:
                   - 'startX', 'startY', 'startZ'
                   - 'endX', 'endY', 'endZ'
                   - 'radius', 'ID'
    
    Returns:
        The ID of the closest cylinder, the corresponding distance, and the offset vector.
    """
    # Convert point and cylinder data to PyTorch tensors on GPU
    point = torch.tensor(point, dtype=torch.float32, device=device)

    # Vector from start to the point
    point_vector = point - start

    # Projection of point_vector onto the cylinder axis
    projection_length = torch.sum(point_vector * axis_unit, dim=1, keepdim=True)
    # projection_point = start + projection_length * axis_unit

    # Clamp the projection to the cylinder segment
    zero_tensor = torch.zeros_like(projection_length)  # Tensor with the same shape as projection_length, filled with zeros
    projection_length_clamped = torch.clamp(projection_length, zero_tensor, axis_length)
    projection_point_clamped = start + projection_length_clamped * axis_unit

    # Compute distances to the cylinder surface
    distance_to_axis = torch.norm(point - projection_point_clamped, dim=1)
    distance_to_surface = torch.abs(distance_to_axis - radius)

    # Find the closest cylinder
    closest_idx = torch.argmin(distance_to_surface).item()
    closest_distance = distance_to_surface[closest_idx].item()
    closest_offset = (projection_point_clamped[closest_idx] - point).cpu().numpy()

    # Get the ID of the closest cylinder
    closest_id = IDs[closest_idx].item()

    return closest_id, closest_distance, closest_offset

Now load an example pointcloud and perform the cylinder finding for every point

In [11]:
def generate_offset_cloud_vectorized( cloud, cylinders, masterBar=None ):

    output_data = np.zeros((len(cloud), 7)) # point coordinates, offset vector, cylinder ID

    for i, point in progress_bar(enumerate(cloud), total=len(cloud), master=masterBar):
    # for i, point in enumerate(cloud):
        index, distance, offset_vector = closest_cylinder_vectorized( point, cylinders )

        output_data[i] = np.concatenate( (point, offset_vector, [index]) )

    return output_data

def generate_offset_cloud_cuda( cloud, cylinders, masterBar=None ):

    output_data = np.zeros((len(cloud), 7)) # point coordinates, offset vector, cylinder ID

    if torch.cuda.is_available():
        device = torch.device('cuda')  
        print("Using cuda device")
        # Get the current CUDA device
        device_id = torch.cuda.current_device()
        # Print device properties
        device_name = torch.cuda.get_device_name(device_id)
        print(f"Using CUDA Device: {device_name}")
    else:
        device = torch.device('cpu')
        print("Using cpu")

    start = torch.tensor(cylinders[['startX', 'startY', 'startZ']].values, dtype=torch.float32, device=device)
    end = torch.tensor(cylinders[['endX', 'endY', 'endZ']].values, dtype=torch.float32, device=device)
    radius = torch.tensor(cylinders['radius'].values, dtype=torch.float32, device=device)
    IDs = torch.tensor(cylinders['ID'].values, dtype=torch.int32, device=device)

    # Compute axis vectors and lengths
    axis = end - start
    axis_length = torch.norm(axis, dim=1, keepdim=True)
    axis_unit = axis / axis_length

    for i, point in progress_bar(enumerate(cloud), total=len(cloud), master=masterBar):
    # for i, point in enumerate(cloud):
        index, distance, offset_vector = closest_cylinder_cuda( point, start, radius, axis_length, axis_unit, IDs, device )

        output_data[i] = np.concatenate( (point, offset_vector, [index]) )

    return output_data


In [12]:
cloudPath = os.path.join( parentDir, "data", "raw", "cloud", "33_1.npy" )
cloud = np.load(cloudPath)

labeled_cloud = generate_offset_cloud_cuda( cloud, cylinders )
print(labeled_cloud[:10,:])

Using cuda device
Using CUDA Device: NVIDIA GeForce RTX 3090


KeyboardInterrupt: 

In [13]:
def closest_cylinder_cuda_batch(points, start, radius, axis_length, axis_unit, IDs, device):
    """
    Find the closest cylinder to a batch of points using GPU acceleration with PyTorch.
    
    Parameters:
        points: A batch of 3D points as a torch tensor of shape (N, 3).
        start, end, radius, axis_length, axis_unit, IDs: Cylinder data as PyTorch tensors.
        device: CUDA device.
    
    Returns:
        IDs, distances, and offsets for the closest cylinders for each point.
    """
    # Convert points to PyTorch tensors
    points = torch.tensor(points, dtype=torch.float32, device=device)

    # Compute vector from start to points (broadcasting)
    point_vectors = points[:, None, :] - start[None, :, :]  # Shape: (N, M, 3)

    # Projection of point_vector onto the cylinder axis
    projection_lengths = torch.sum(point_vectors * axis_unit[None, :, :], dim=2, keepdim=True)  # Shape: (N, M, 1)

    # Clamp the projection to the cylinder segment
    zero_tensor = torch.zeros_like(projection_lengths)
    projection_lengths_clamped = torch.clamp(projection_lengths, zero_tensor, axis_length[None, :, :])
    projection_points_clamped = start[None, :, :] + projection_lengths_clamped * axis_unit[None, :, :]

    # Compute distances to the cylinder surface
    distances_to_axis = torch.norm(points[:, None, :] - projection_points_clamped, dim=2)  # Shape: (N, M)
    distances_to_surface = torch.abs(distances_to_axis - radius[None, :])  # Shape: (N, M)

    # Find the closest cylinder for each point
    closest_indices = torch.argmin(distances_to_surface, dim=1)  # Shape: (N,)
    closest_distances = distances_to_surface[range(len(points)), closest_indices]  # Shape: (N,)
    closest_offsets = projection_points_clamped[range(len(points)), closest_indices] - points  # Shape: (N, 3)

    # Get the IDs of the closest cylinders
    closest_ids = IDs[closest_indices]

    return closest_ids.cpu().numpy(), closest_distances.cpu().numpy(), closest_offsets.cpu().numpy()

def generate_offset_cloud_cuda_batched(cloud, cylinders, device, masterBar=None, batch_size=1024):
    output_data = np.zeros((len(cloud), 7))  # point coordinates, offset vector, cylinder ID

    # Prepare cylinder data on the GPU
    start = torch.tensor(cylinders[['startX', 'startY', 'startZ']].values, dtype=torch.float32, device=device)
    end = torch.tensor(cylinders[['endX', 'endY', 'endZ']].values, dtype=torch.float32, device=device)
    radius = torch.tensor(cylinders['radius'].values, dtype=torch.float32, device=device)
    IDs = torch.tensor(cylinders['ID'].values, dtype=torch.int32, device=device)
    axis = end - start
    axis_length = torch.norm(axis, dim=1, keepdim=True)
    axis_unit = axis / axis_length

    # Process the cloud in batches
    for i in progress_bar(range(0, len(cloud), batch_size), parent=masterBar):
        batch = cloud[i:i + batch_size]
        ids, distances, offsets = closest_cylinder_cuda_batch(batch, start, radius, axis_length, axis_unit, IDs, device)

        # Store results
        output_data[i:i + batch_size, :3] = batch
        output_data[i:i + batch_size, 3:6] = offsets
        output_data[i:i + batch_size, 6] = ids

    return output_data


In [14]:
cloudPath = os.path.join( parentDir, "data", "raw", "cloud", "33_4.npy" )
cloud = np.load(cloudPath)

device = get_device()

labeled_cloud = generate_offset_cloud_cuda_batched( cloud, cylinders, device, batch_size=1024 )
print(labeled_cloud[:10,:])

Using cuda device
Using CUDA Device: NVIDIA GeForce RTX 3090


[[ 2.72333002e+00  2.63748398e+01  1.45375900e+01  2.17761230e+01
  -5.36017227e+00  4.33740616e-01  3.00700000e+03]
 [ 2.61204004e+00  2.76592808e+01  1.46173897e+01  2.18874130e+01
  -6.64461327e+00  3.53940964e-01  3.00700000e+03]
 [ 2.60033011e+00  2.76658993e+01  1.46201897e+01  2.18991222e+01
  -6.65123177e+00  3.51140976e-01  3.00700000e+03]
 [ 2.57928991e+00  2.76683998e+01  1.46388903e+01  2.19201622e+01
  -6.65373230e+00  3.32440376e-01  3.00700000e+03]
 [ 2.65076995e+00  2.77662296e+01  1.46318903e+01  2.18486824e+01
  -6.75156212e+00  3.39440346e-01  3.00700000e+03]
 [ 2.51680994e+00  2.76176395e+01  1.46705904e+01  2.19826431e+01
  -6.60297203e+00  3.00740242e-01  3.00700000e+03]
 [ 2.41343999e+00  2.77647095e+01  1.48219900e+01  2.20860119e+01
  -6.75004196e+00  1.49340630e-01  3.00700000e+03]
 [ 2.40812993e+00  2.77932396e+01  1.48212900e+01  2.20913219e+01
  -6.77857208e+00  1.50040627e-01  3.00700000e+03]
 [ 2.55064988e+00  2.76313000e+01  1.46526899e+01  2.19488029e+0

Now formulate a function that loads all clouds, labels them and stores them

In [17]:
def label_clouds( cloudDir, cylinderDir, labelDir, batch_size=1024 ):

    device = get_device()
    
    # Create list of paths going to the clouds and cylinders
    cloudList = [os.path.join( cloudDir, file ) for file in os.listdir( cloudDir ) if file.endswith(".npy") ]
    cylinderList = [os.path.join( cylinderDir, file ) for file in os.listdir( cylinderDir ) if 
    file.endswith(".csv")]

    def get_prefix(path):
        parts = os.path.basename(path).split('.')[0].split('_')
        return int(parts[0]), int(parts[1])  # Convert to integers for proper numerical sorting

    cloudList.sort(key=get_prefix)
    cylinderList.sort(key=get_prefix)

    print("\nLabeling clouds...")
    mb = master_bar( range(len(cloudList)) )
    for i in mb:
        # load the data
        cloud = np.load( cloudList[i] )
        cylinders = pd.read_csv( cylinderList[i], header=0 )
        cylinders.columns = cylinders.columns.str.strip() # Clean whitespaces in column names 

        # Get the labeled data
        output_data = generate_offset_cloud_cuda_batched(cloud, cylinders, device, masterBar=mb, batch_size=batch_size)

        # Save the output
        fileName = os.path.basename( cloudList[i] ).split('.')[0]
        savePath = os.path.join( labelDir, fileName+'_labeled.npy')
        np.save( savePath, output_data )

    print("Finished labeling and saving!")

        

In [18]:
cylinderDir = os.path.join(parentDir, 'data', 'raw', 'QSM', 'detailed')
cloudDir = os.path.join( parentDir, 'data', 'raw', 'cloud')
labelDir = os.path.join( parentDir, 'data', 'labeled', 'cloud')

label_clouds( cloudDir, cylinderDir, labelDir )

Using cuda device
Using CUDA Device: NVIDIA GeForce RTX 3090

Labeling clouds...


Finished labeling and saving!


In [21]:
labeled_cloud = np.load( os.path.join(labelDir, '33_1_labeled.npy') )
print(labeled_cloud[5000:5020])

[[ 2.75153408e+01  2.36245003e+01 -8.80909980e-01 -1.45999908e-01
   6.37836456e-02  4.05400991e-03  3.00000000e+00]
 [ 2.74959202e+01  2.36058502e+01 -8.62710000e-01 -1.27571106e-01
   7.87925720e-02  7.43317604e-03  3.00000000e+00]
 [ 2.75147991e+01  2.36131096e+01 -8.65109980e-01 -1.46249771e-01
   7.22694397e-02  5.47420979e-03  3.00000000e+00]
 [ 2.74173908e+01  2.35351296e+01 -9.93709980e-01 -5.67779541e-02
   1.61560059e-01  2.53933668e-03  2.00000000e+00]
 [ 2.74936409e+01  2.36299191e+01 -8.63910020e-01 -1.25061035e-01
   5.55686951e-02  3.62980366e-03  3.00000000e+00]
 [ 2.74391899e+01  2.35614796e+01 -1.01250994e+00 -8.12892914e-02
   1.34410858e-01  1.14624500e-02  2.00000000e+00]
 [ 2.73954792e+01  2.35138702e+01 -9.88910020e-01 -3.56101990e-02
   1.82600021e-01 -4.97215986e-03  2.00000000e+00]
 [ 2.74642506e+01  2.35732002e+01 -8.55409980e-01 -9.65366364e-02
   1.09107971e-01  1.39740705e-02  3.00000000e+00]
 [ 2.74034004e+01  2.35289402e+01 -9.91710010e-01 -4.33826447e-0