In [14]:
import jax 
import jax.numpy as jnp
import numpy as np
from jax import jit, vmap
import torch
from scipy.spatial import cKDTree
from scipy.spatial import KDTree
import os
import random

#### Processing function

In [15]:
@jit
def preprocess(points):
    mean_p = points.mean(axis=0)
    min_p, max_p = jnp.min(points, axis=0), jnp.max(points, axis=0)
    bbdiag = jnp.linalg.norm(max_p - min_p, ord=2) # Bounding box diagonal L2 norm (Euclidean distance)
    return (points - mean_p) / (0.5 * bbdiag)


In [16]:
def pca_points(patch_points):
    '''
    Args:
        patch_points: xyz points

    Returns:
        patch_points: xyz points after aligning using pca
    '''
    # compute pca of points in the patch:
    # center the patch around the mean:
    pts_mean = patch_points.mean(0)
    patch_points = patch_points - pts_mean
    trans, _, _ = torch.svd(torch.t(patch_points))
    patch_points = torch.mm(patch_points, trans)
    cp_new = -pts_mean  # since the patch was originally centered, the original cp was at (0,0,0)
    cp_new = torch.matmul(cp_new, trans)
    # re-center on original center point
    patch_points = patch_points - cp_new
    return patch_points, trans


In [46]:

def save_neighborhood_to_txt(patch_points, filename="neighborhood.txt"):
    np.savetxt(filename, patch_points, fmt="%.6f", delimiter=" ")
    print(f"Saved neighborhood to {filename}")
    
    
## Modified leihui code to save the files 
    
def processPartL(kdtree, index, points, searchK):
    # print (f'points[index, :]:{points[index, :]}')
    point_distances, patch_point_inds = kdtree.query(points[index, :], k=searchK)
    rad = max(point_distances)
    patch_points = torch.from_numpy(points[patch_point_inds, :])
    
    # center the points around the query point and scale patch to unit sphere
    patch_points = patch_points - torch.from_numpy(points[index, :])
    # patch_points = patch_points / rad
    
    patch_points, trans = pca_points(patch_points)
    return patch_points, patch_point_inds, trans, rad

##### PC-Diff imports

In [41]:
from pcdiff import knn_graph, estimate_basis, build_grad_div




def calculate_gradients(pointcloud, k_neighbors):
    #TODO: Find out if this is only grad_x or just the gradient found here, maybe need more info for complete gradient
    edge_index = knn_graph(pointcloud, k_neighbors)
    normal, x_basis, y_basis = estimate_basis(pointcloud, edge_index)
    grad, _ = build_grad_div(pointcloud, normal, x_basis, y_basis, edge_index)
    x = np.random.rand(len(pointcloud), 1)
    gradients = grad @ x
    
    
    return gradients 


# Creating function to generate datasets from pointclouds
* Pointclouds in the dataset folder are all used, divided to use equal amount of neighborhoods from each cloud
* Can be used to save multiple txt files containing a point cloud neighborhood each
* Can be used to create one long txt file where the neighborhoods are appended to the file
* The data will have the structure (x, y, z, gradient)

In [48]:
def create_training_data(num_training_sets, num_neighbors, method = "PCA", datasets = "./Data/Full_point_clouds", save_path = "./Data/Training_data", save_to_file = False,
                         single_file_name="CombinedDataset.txt", save_mode="single"):
    
    # check if the save folder is available or create it if not
    if save_to_file and not os.path.exists(save_path):
        os.makedirs(save_path)
    
    all_files = [os.path.join(datasets, f) for f in os.listdir(datasets) if f.endswith('.txt') or f.endswith('.xyz')]
    total_sets_created = 0 # Counter to see how many training points have been created 
    
    sets_per_file = int(num_training_sets/len(all_files))
    
    for file_path in all_files:
        if total_sets_created >= num_training_sets: 
            break # Break if the number of training data created is reached
    
    if save_mode == "single":
        single_file_path = os.path.join(save_path, single_file_name)
        if os.path.exists(single_file_path):
            os.remove(single_file_path)  # Clear the file if it exists
    
        
        
    for file_path in all_files:
        rawpoints = np.loadtxt(file_path, usecols=(0, 1, 2))  # Load only the first three columns (x, y, z)
        print(f"Loaded {rawpoints.shape[0]} points from {file_path}")

        points = preprocess(rawpoints)
        points_np = np.array(points.block_until_ready())
        kdtree = KDTree(points_np)

        gradients = calculate_gradients(points_np, num_neighbors)
        
        selected_indices = random.sample(range(len(points)), sets_per_file)
        
        for i in selected_indices:
            if method == "PCA":
                neighborhood, indices, _, _ = processPartL(kdtree, i, points_np, num_neighbors)
                
                neighborhood_gradients = gradients[indices]
                
                neighborhood_with_gradients = np.hstack([neighborhood.numpy(), neighborhood_gradients])
                
                if save_to_file:
                    if save_mode == "multiple":
                        # Save each neighborhood in a separate file
                        filename = os.path.join(save_path, f"neighborhood_{total_sets_created}.txt")
                        save_neighborhood_to_txt(neighborhood_with_gradients, filename)
                    elif save_mode == "single":
                        # Append the neighborhood to a single file
                        with open(single_file_path, "a") as f:
                            np.savetxt(f, neighborhood_with_gradients, fmt="%.6f", delimiter=" ")

                total_sets_created += 1

                if total_sets_created >= num_training_sets:
                    break  # Exits the inner loop

    return neighborhood  # Align this correctly    

In [50]:
points = create_training_data(100, 20, save_to_file=True, save_mode="single")

Loaded 35947 points from ./Data/Full_point_clouds\bunny_order3_normal_beta.txt
Loaded 120982 points from ./Data/Full_point_clouds\cube-isometric.xyz
