In [3]:
import jax 
import jax.numpy as jnp
import numpy as np
from jax import jit, vmap
import torch
from scipy.spatial import cKDTree
from scipy.spatial import KDTree
import os
import random
import numpy.linalg as LA

#### Processing function

In [4]:
@jit
def preprocess(points):
    mean_p = points.mean(axis=0)
    min_p, max_p = jnp.min(points, axis=0), jnp.max(points, axis=0)
    bbdiag = jnp.linalg.norm(max_p - min_p, ord=2) # Bounding box diagonal L2 norm (Euclidean distance)
    return (points - mean_p) / (0.5 * bbdiag)


In [5]:
def pca_points(patch_points):
    '''
    Args:
        patch_points: xyz points

    Returns:
        patch_points: xyz points after aligning using pca
    '''
    # compute pca of points in the patch:
    # center the patch around the mean:
    pts_mean = patch_points.mean(0)
    patch_points = patch_points - pts_mean
    trans, _, _ = torch.svd(torch.t(patch_points))
    patch_points = torch.mm(patch_points, trans)
    cp_new = -pts_mean  # since the patch was originally centered, the original cp was at (0,0,0)
    cp_new = torch.matmul(cp_new, trans)
    # re-center on original center point
    patch_points = patch_points - cp_new
    return patch_points, trans


In [6]:

def save_neighborhood_to_txt(patch_points, filename="neighborhood.txt"):
    np.savetxt(filename, patch_points, fmt="%.6f", delimiter=" ")
    print(f"Saved neighborhood to {filename}")
    
    
## Modified leihui code to save the files 
    
def processPartL(kdtree, index, points, searchK):
    # print (f'points[index, :]:{points[index, :]}')
    point_distances, patch_point_inds = kdtree.query(points[index, :], k=searchK)
    rad = max(point_distances)
    patch_points = torch.from_numpy(points[patch_point_inds, :])
    
    # center the points around the query point and scale patch to unit sphere
    patch_points = patch_points - torch.from_numpy(points[index, :])
    # patch_points = patch_points / rad
    
    patch_points, trans = pca_points(patch_points)
    return patch_points, patch_point_inds, trans, rad

##### PC-Diff imports

In [7]:
from pcdiff import knn_graph, estimate_basis, build_grad_div




def calculate_gradients(pointcloud, k_neighbors):
    #TODO: Find out if this is only grad_x or just the gradient found here, maybe need more info for complete gradient
    edge_index = knn_graph(pointcloud, k_neighbors)
    normal, x_basis, y_basis = estimate_basis(pointcloud, edge_index)
    gradients = build_grad_ours(pointcloud, normal, x_basis, y_basis, edge_index)
    
    return gradients 


In [8]:
from pcdiff import coords_projected, gaussian_weights, weighted_least_squares

def build_grad_ours(pos, normal, x_basis, y_basis, edge_index, kernel_width=1, regularizer=1e-8, shape_regularizer=None):
    row, col = edge_index
    k = (row == 0).sum()

    coords = coords_projected(pos, normal, x_basis, y_basis, edge_index, k)

    dist = LA.norm(pos[col] - pos[row], axis=1)
    weights = gaussian_weights(dist, k, kernel_width)

    if shape_regularizer is None:
        wls = weighted_least_squares(coords, weights, k, regularizer)
    else:
        wls, wls_shape = weighted_least_squares(coords, weights, k, regularizer, shape_regularizer)

    gradients = wls[::20,1:3]    
    
    return gradients

# Creating function to generate datasets from pointclouds
* Pointclouds in the dataset folder are all used, divided to use equal amount of neighborhoods from each cloud
* Can be used to save multiple txt files containing a point cloud neighborhood each
* Can be used to create one long txt file where the neighborhoods are appended to the file
* The data will have the structure (x, y, z, gradient_x, gradient_y, radius, num points)

In [9]:
def create_training_data(num_training_sets, num_neighbors, grad_neighbors, method = "PCA", datasets = "./Data/Full_point_clouds", save_path = "./Data/Training_data", save_to_file = False,
                         single_file_name="CombinedDataset.txt", save_mode="single"):
    
    # check if the save folder is available or create it if not
    if save_to_file and not os.path.exists(save_path):
        os.makedirs(save_path)
    
    all_files = [os.path.join(datasets, f) for f in os.listdir(datasets) if f.endswith('.txt') or f.endswith('.xyz')]
    total_sets_created = 0 # Counter to see how many training points have been created 
    
    sets_per_file = int(num_training_sets/len(all_files))
    
    for file_path in all_files:
        if total_sets_created >= num_training_sets: 
            break # Break if the number of training data created is reached
    
    if save_mode == "single":
        single_file_path = os.path.join(save_path, single_file_name)
        if os.path.exists(single_file_path):
            os.remove(single_file_path)  # Clear the file if it exists
    
        
        
    for file_path in all_files:
        rawpoints = np.loadtxt(file_path, usecols=(0, 1, 2))  # Load only the first three columns (x, y, z)
        print(f"Loaded {rawpoints.shape[0]} points from {file_path}")

        points = preprocess(rawpoints)
        points_np = np.array(points.block_until_ready())
        kdtree = KDTree(points_np)

        gradients = calculate_gradients(points_np, grad_neighbors)
        
        selected_indices = random.sample(range(len(points)), sets_per_file)
        
        for i in selected_indices:
            if method == "PCA":
                neighborhood, indices, _, radius = processPartL(kdtree, i, points_np, num_neighbors)
                
                neighborhood_gradients = gradients[indices]
                                
                distance_from_origin = np.array(L2_norm(neighborhood).block_until_ready())
            
                grad_diff = np.array(L2_norm(neighborhood_gradients).block_until_ready())
                
                radius_list = [radius]*num_neighbors
                radius_list = np.full((num_neighbors, 1), radius)  # Ensure it is a column vector

                neighborhood_with_gradients = np.hstack([neighborhood.numpy(), neighborhood_gradients])
                
                neighborhood_with_gradients = np.hstack([neighborhood_with_gradients, radius_list])
                
        
                if save_to_file:
                    if save_mode == "multiple":
                        # Save each neighborhood in a separate file
                        filename = os.path.join(save_path, f"neighborhood_{total_sets_created}.txt")
                        save_neighborhood_to_txt(neighborhood_with_gradients, filename)
                    elif save_mode == "single":
                        # Append the neighborhood to a single file
                        with open(single_file_path, "a") as f:
                            np.savetxt(f, neighborhood_with_gradients, fmt="%.6f", delimiter=" ")

                total_sets_created += 1

                if total_sets_created >= num_training_sets:
                    break  # Exits the inner loop

    return neighborhood  # Align this correctly    

In [10]:

def L2_norm(nbh, origin_index=0):
    # Convert PyTorch tensor to JAX array if needed
    if isinstance(nbh, torch.Tensor):
        nbh = jax.device_put(nbh.detach().cpu().numpy())  # Convert to JAX array

    nbh_jax = jnp.array(nbh)  # Ensure JAX array
    origin = nbh_jax[origin_index]  # Get the origin point
    dist = jnp.linalg.norm(nbh_jax - origin, axis=1)  # Compute L2 Norm
    return dist

In [11]:
# points = create_training_data(200, 20, 20, save_to_file=True, save_mode="single")

## Data generation from previous but creating features for neighborhoods as a whole

In [12]:
import numpy as np
import os

def neighborhood_feature_generation(num_runs=None, mixer=False, outputfile="Combined_neighborhood_data.txt", save_path="./Data/Training_data"):
    """
    Generates mixed-sized neighborhoods using create_training_data() and extracts their statistical features.

    Parameters:
        num_runs (int): Number of times to run create_training_data() with different neighborhood sizes.
        mixer (bool): If True, generate mixed neighborhood sizes.
        outputfile (str): Name of the output feature file.
        save_path (str): Directory where datasets are stored.
    """
    
    # Ensure save directory exists
    os.makedirs(save_path, exist_ok=True)

    output_path = os.path.join(save_path, outputfile)

    if mixer:
        print("Generating mixed neighborhood sizes...")

        # Clear the output file at the start to avoid appending to old data
        if os.path.exists(output_path):
            os.remove(output_path)

        neighborhood_sizes = []  # Store number of neighbors for each run

        for _ in range(num_runs):
            num_neighbors = np.random.randint(5, 35)  # Random neighborhood size per run
            neighborhood_sizes.append(num_neighbors)

            # Create new training data with 50 samples per iteration
            create_training_data(50, num_neighbors, 20, save_to_file=True, save_mode="single")

        # Load the full combined dataset (which should have 50 * num_runs neighborhoods)
        combined_data_path = os.path.join(save_path, "CombinedDataset.txt")
        if not os.path.exists(combined_data_path):
            print("No combined dataset found. Exiting.")
            return
        
        try:
            data = np.loadtxt(combined_data_path)
            if data.size == 0:
                print("Generated dataset is empty. Exiting.")
                return
        except Exception as e:
            print(f"Error loading data: {e}")
            return

    else: 
        # If mixer is False, process an existing dataset
        data_path = os.path.join(save_path, "CombinedDataset.txt")
        if not os.path.exists(data_path):
            print(f"File {data_path} not found. Exiting.")
            return

        data = np.loadtxt(data_path)
        neighborhood_sizes = [20] * (len(data) // 50)  # Assume 20 neighbors if not using mixer

    # Identify separator rows (where the first three columns are all 0)
    separators = np.where((data[:, 0] == 0) & (data[:, 1] == 0) & (data[:, 2] == 0))[0]

    # Store each neighborhood separately
    neighborhoods = []
    start_idx = 0

    for sep in separators:
        if start_idx != sep:
            neighborhoods.append(data[start_idx:sep])  
        start_idx = sep + 1  

    if start_idx < len(data):
        neighborhoods.append(data[start_idx:])
    
    with open(output_path, "w") as f:
        for i, n in enumerate(neighborhoods):
            if len(n) == 0:
                continue  

            mean_col4 = np.mean(n[:, 3])
            var_col4 = np.var(n[:, 3])

            mean_col5 = np.mean(n[:, 4])
            var_col5 = np.var(n[:, 4])

            radius = n[0, -1]  


            f.write(f"{mean_col4:.6f} {var_col4:.6f} {mean_col5:.6f} {var_col5:.6f} {radius:.6f} {num_neighbors}\n")
        
    print(f"Processed {len(neighborhoods)} neighborhoods. Output saved to {output_path}")


In [13]:
# neighborhood_feature_generation(10, True)

In [14]:
import numpy as np
import os

def data_to_neighborhood(num_neighbors=10, save_path="./Data/Training_data"):
    neighborhood_training_data = []  # Use a list to store rows first

    for i in range(num_neighbors):
        create_training_data(50, num_neighbors, 20, save_to_file=True, save_mode="single")
        data_path = os.path.join(save_path, "CombinedDataset.txt")
        if not os.path.exists(data_path):
            print(f"File {data_path} not found. Exiting.")
            return

        data = np.loadtxt(data_path)
        neighborhood_sizes = [20] * (len(data) // 50)  # Assume 20 neighbors if not using mixer

        # Identify separator rows (where the first three columns are all 0)
        separators = np.where((data[:, 0] == 0) & (data[:, 1] == 0) & (data[:, 2] == 0))[0]
        
        # Store each neighborhood separately
        neighborhoods = []
        start_idx = 0

        for sep in separators:
            if start_idx != sep:
                neighborhoods.append(data[start_idx:sep])  
            start_idx = sep + 1  

        if start_idx < len(data):
            neighborhoods.append(data[start_idx:])

        for n in neighborhoods:
            if len(n) == 0:
                continue  

            mean_col4 = np.mean(n[:, 3])
            var_col4 = np.var(n[:, 3])

            mean_col5 = np.mean(n[:, 4])
            var_col5 = np.var(n[:, 4])

            radius = n[0, -1]  

            # Append to list
            neighborhood_training_data.append([mean_col4, var_col4, mean_col5, var_col5, radius])

    # Convert to NumPy array at the end
    neighborhood_training_data = np.array(neighborhood_training_data)
    
    return neighborhood_training_data


## New create training data on better data


In [2]:
%run "../pcdiff/grad_curvature.ipynb"

In [None]:
def create_training_data_better(num_training_sets, num_neighbors, comparison_size=6, datasets = "./Data/Full_point_clouds", save_path = "./Data/Training_data", save_to_file = True,
                                 single_file_name="THEdataset.txt", save_mode="single"):
    
    # check if the save folder is available or create it if not 
    if save_to_file and not os.path.exists(save_path):
        os.makedirs(save_path)
    
    all_files = [os.path.join(datasets, f) for f in os.listdir(datasets) if f.endswith('.txt') or f.endswith('.xyz')]
    total_sets_created = 0 # Counter to see how many training points have been created 
    
    sets_per_file = int(num_training_sets/len(all_files))
    
    for file_path in all_files:
        if total_sets_created >= num_training_sets: 
            break # Break if the number of training data created is reached
    
    if save_mode == "single":
        single_file_path = os.path.join(save_path, single_file_name)
        if os.path.exists(single_file_path):
            os.remove(single_file_path)  # Clear the file if it exists
    
        
        
    for file_path in all_files:
        points = np.loadtxt(file_path, usecols=(0, 1, 2))  # Load only the first three columns (x, y, z)
        print(f"Loaded {points.shape[0]} points from {file_path}")

        pos_dist, grad_dist, curv = get_nn_data(points, num_neighbors, comparison_size)
        
        neighborhood_to_save = np.column_stack((pos_dist, grad_dist, curv))
        # Define headers
        headers = "Distance1 Distance2 Distance3 Distance4 Distance5 Distance6 Grad1 Grad2 Grad3 Grad4 Grad5 Grad6 Curvature"


        if save_to_file:
            write_mode = "w" if total_sets_created == 0 else "a"  # "w" writes header first, "a" appends

            # Save data with headers only at the beginning
            with open(single_file_path, write_mode) as f:
                np.savetxt(f, neighborhood_to_save, fmt="%.6f", delimiter=" ", header=headers if total_sets_created == 0 else "", comments='')

        total_sets_created += 1

        if total_sets_created >= num_training_sets:
            break  # Exits the inner loop

    return neighborhood_to_save  # Align this correctly    

In [21]:
create_training_data_better(2, 20, 6)

Loaded 35947 points from ./Data/Full_point_clouds\bunny_order3_normal_beta.txt
(35947, 6)
(35947, 6)
(35947,)
Loaded 120982 points from ./Data/Full_point_clouds\cube-isometric.xyz
(120982, 6)
(120982, 6)
(120982,)


array([[  0.        ,   0.07042217,   0.07042254, ...,   0.45012096,
          0.68027246, -11.26065299],
       [  0.        ,   0.07042254,   0.07042254, ...,   0.04994819,
          0.55386084,  -6.69441754],
       [  0.        ,   0.0704225 ,   0.07042256, ...,   0.        ,
          0.        ,   0.        ],
       ...,
       [  0.        ,   0.07042217,   0.07042254, ...,   0.88795829,
          0.54334801,   5.24184792],
       [  0.        ,   0.06208038,   0.0708435 , ...,   0.49645737,
          0.40042239, -10.13771037],
       [  0.        ,   0.07042217,   0.07042254, ...,   0.88795829,
          0.54334801, -26.80719155]])

In [None]:
import numpy as np
import os

def data_to_neighborhood_better(num_neighbors=10, num_sets=10, save_path="./Data/Training_data"):
    neighborhood_training_data = []  # Use a list to store rows first

    for i in range(num_sets):
        create_training_data(50, num_neighbors, 20, save_to_file=True, save_mode="single")
        data_path = os.path.join(save_path, "THEdataset.txt")
        if not os.path.exists(data_path):
            print(f"File {data_path} not found. Exiting.")
            return

        data = np.loadtxt(data_path)
        # neighborhood_sizes = [20] * (len(data) // 50)  # Assume 20 neighbors if not using mixer

        # Identify separator rows (where the first three columns are all 0)
        separators = np.where((data[:, 0] == 0) & (data[:, 1] == 0) & (data[:, 2] == 0))[0]
        
        # Store each neighborhood separately
        neighborhoods = []
        start_idx = 0

        for sep in separators:
            if start_idx != sep:
                neighborhoods.append(data[start_idx:sep])  
            start_idx = sep + 1  

        if start_idx < len(data):
            neighborhoods.append(data[start_idx:])

        for n in neighborhoods:
            if len(n) == 0:
                continue  




            # mean_col4 = np.mean(n[:, 3])
            # var_col4 = np.var(n[:, 3])

            # mean_col5 = np.mean(n[:, 4])
            # var_col5 = np.var(n[:, 4])

            # radius = n[0, -1]  

            # Append to list
            neighborhood_training_data.append([mean_col4, var_col4, mean_col5, var_col5, radius])
    
    # Convert to NumPy array at the end
    neighborhood_training_data = np.array(neighborhood_training_data)
    save_neighborhood_to_txt(neighborhood_training_data, filename="DEN_HER.txt")
        
    return neighborhood_training_data