In [1]:
import os
import laspy as lp
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.neighbors import KDTree
from sklearn.mixture import GaussianMixture

def read_las_file(las_path):
    try:
        return lp.read(las_path)
    except Exception as e:
        print(f"Error reading {las_path}: {e}")
        return None

def normalize_colors(colors):
    rgba = colors - colors.min(axis=0)
    rgba = rgba / rgba.max(axis=0)
    return (rgba * 255).astype(np.uint8)

def normalize_points(points, eps= 1e-8):
    if not isinstance(points, np.ndarray) or points.shape[1] != 3:
        raise ValueError("points must be a numpy array with shape (n, 3).")
    points_ = points.copy()

    x_min, x_max = np.min(points_[:,0]), np.max(points_[:,0])
    y_min, y_max = np.min(points_[:,1]), np.max(points_[:,1])
    z_min, z_max = np.min(points_[:,2]), np.max(points_[:,2])

    points_[:,0] = (points_[:,0] - x_min) / (x_max - x_min + eps)
    points_[:,1] = (points_[:,1] - y_min) / (y_max - y_min + eps)
    points_[:,2] = (points_[:,2] - z_min) / (z_max - z_min + eps)
    
    return points_, x_min, x_max, y_min, y_max, z_min, z_max  

def get_points_and_colors(las_paths: list, limit: int = -1):
    if not isinstance(las_paths, list) or not all(isinstance(path, str) for path in las_paths):
        raise ValueError("las_paths must be a list of strings.")

    if not isinstance(limit, int) or limit < -1:
        raise ValueError("limit must be an integer greater than or equal to -1.")

    points = []
    colors = []
    classes = []

    for las_path in las_paths:
        las = read_las_file(las_path)
        if las is not None:
            points.append(np.vstack((las.x, las.y, las.z)).transpose())
            colors.append(np.vstack((las.red, las.green, las.blue)).transpose())
            classes.append(las.classification)

    if not points:
        raise ValueError("No valid LAS files found.")

    points = np.vstack(points)
    colors = np.vstack(colors)
    classes = np.hstack(classes)

    if limit > 0:
        idxs = np.random.choice(len(points), limit, replace=False)
        points = points[idxs]
        colors = colors[idxs]
        classes = classes[idxs]

    rgba_colors = normalize_colors(colors)

    return points, colors, rgba_colors, classes


def get_neighborhood(kdtree, point_idx, radius):
    indices = kdtree.query_radius([kdtree.data[point_idx]], r=radius)
    return indices[0]

def automatic_gmm_components(data, max_components=2, criterion='aic', T=0.1, **kwargs):
    n_components_range = range(1, max_components + 1)
    criterions, means = [], []
    if criterion not in ['aic', 'bic']:
        raise ValueError("Criterio no válido. Usa 'bic' o 'aic'.")
    
    for n in n_components_range:
        try:
            gmm = GaussianMixture(n_components=n, **kwargs)
            gmm.fit(data)
            criterions.append(gmm.bic(data)) if criterion == 'bic' else criterions.append(gmm.aic(data))
            means.append(gmm.means_)
            
        except ValueError as e:
            print(f"Error fitting GMM with {n} components: {e}")
            break

    optimal_idx = np.argmin(criterions)
    optimal_means = means[optimal_idx]
    optimal_components = n_components_range[optimal_idx]
    
    if optimal_components == 2 and abs(optimal_means[0] - optimal_means[1]) <= T:
        return 1, [np.mean(optimal_means, dtype=np.float32)]
    
    if not criterions:
        return 1, [np.mean(data, dtype=np.float32)]  # Si no se pudo ajustar ningún modelo, devolver 1 componente
    
    return optimal_components, means[optimal_idx]

In [2]:
name_project = 'poza1b'
path_project = f'../data/{name_project}' 

las_paths = [f'{path_project}/{filename}' for filename in os.listdir(path_project) if filename.endswith('.las')]
points, colors, rgba_colors, classes = get_points_and_colors(las_paths, limit=10_000)

points.shape, colors.shape, rgba_colors.shape, classes.shape

((10000, 3), (10000, 3), (10000, 3), (10000,))

In [3]:
# # Definir el tamaño de la vecindad
# result = []
# neighborhood_size = 10 # transforma a metro 
# x,y,z = points[:,0], points[:,1], points[:,2]

# points_norm, x_min, x_max, y_min,y_max, z_min, z_max = normalize_points(points)
# x_norm,y_norm,z_norm = points_norm[:,0], points_norm[:,1], points_norm[:,2]

# neighborhood_size_x = neighborhood_size / (x_max - x_min + 1e-8)
# neighborhood_size_y = neighborhood_size / (y_max - y_min + 1e-8)

# # Iterar sobre cada punto (x, y)
# for i in tqdm(range(len(x))):
#     # Filtrar los puntos en la vecindad
#     px, py, pz = x_norm[i], y_norm[i], z_norm[i] 
#     ## obtener todo los puntos de la venciddad de px py utlizar kdtree para la vencidad
#     mask = (np.abs(x_norm - px) < neighborhood_size_x) & (np.abs(y_norm - py) < neighborhood_size_y)
#     z_neighborhood = z_norm[mask]

#     if len(z_neighborhood) < 2:
#         continue

#     z_neighborhood = z_neighborhood.reshape(-1, 1)

#     # Encontrar y ajustar el modelo óptimo
#     optimal_n_components, means = automatic_gmm_components(z_neighborhood, max_components=2)
#     result.append([x_norm[i], y_norm[i], z_norm[i], optimal_n_components])

#     # if optimal_n_components >= 2:

#     #     gmm_optimal = GaussianMixture(n_components=optimal_n_components, random_state=42)
#     #     gmm_optimal.fit(z_neighborhood)

#     #     # Preparar datos para la gráfica
#     #     x_min_, x_max_ = z_neighborhood.min() - 1, z_neighborhood.max() + 1
#     #     x_axis = np.linspace(x_min_, x_max_, 1000).reshape(-1, 1)
        
#     #     # Calcular componentes
#     #     logprob = gmm_optimal.score_samples(x_axis)
#     #     individual_pdfs = np.exp(gmm_optimal.score_samples(x_axis))  # PDF de la mezcla completa

#     #     # Configurar la gráfica
#     #     plt.figure(figsize=(10, 6))
#     #     plt.hist(z_neighborhood, bins=30, density=True, alpha=0.6, color='g', label='Datos')
#     #     plt.plot(x_axis, np.exp(logprob), 'k-', lw=2, label='Mezcla GMM')

#     #     # Graficar componentes individuales
#     #     for j in range(gmm_optimal.n_components):
#     #         weight = gmm_optimal.weights_[j]
#     #         mean = gmm_optimal.means_[j, 0]
#     #         std = np.sqrt(gmm_optimal.covariances_[j, 0])
            
#     #         component_pdf = weight * (1/(std * np.sqrt(2 * np.pi))) * \
#     #                     np.exp(-0.5 * ((x_axis - mean)/std)**2)
            
#     #         plt.plot(x_axis, component_pdf, '--', lw=2, 
#     #                 label='ac') #f'Componente {j+1} ($\mu$={mean:.2f}, $\sigma$={std:.2f})')
#     #     plt.title(f'Distribución de z en ({x_norm[i]:.2f}, {y_norm[i]:.2f}) - {optimal_n_components} componentes')
#     #     plt.xlabel('Valor de z')
#     #     plt.ylabel('Frecuencia')
#     #     plt.legend()
#     #     plt.show()

# np.array(result).shape         

In [4]:
neighborhood_size = 1
x, y, z = points[:, 0], points[:, 1], points[:, 2]

points_norm, x_min, x_max, y_min, y_max, z_min, z_max = normalize_points(points)
x_norm, y_norm, z_norm = points_norm[:, 0], points_norm[:, 1], points_norm[:, 2]

neighborhood_size_x = neighborhood_size / (x_max - x_min + 1e-8)
neighborhood_size_y = neighborhood_size / (y_max - y_min + 1e-8)

kdtree = KDTree(points_norm[:, :2])  # Solo usamos x y para calcular la vecindad
result = []

for i in tqdm(range(len(x_norm))):
    px, py = x_norm[i], y_norm[i]
    
    # Obtener los índices de los puntos en la vecindad usando KDTree
    indices_neighborhood = get_neighborhood(kdtree, i, neighborhood_size_x)
    
    # Filtrar los puntos en la vecindad por las coordenadas Y
    #mask_y = np.abs(y_norm[indices_neighborhood] - py) < neighborhood_size_y
    #indices_neighborhood = indices_neighborhood[mask_y]
    
    z_neighborhood = z_norm[indices_neighborhood]
    
    if len(z_neighborhood) < 2:
        continue
    
    z_neighborhood = z_neighborhood.reshape(-1, 1)
    optimal_n_components, means = automatic_gmm_components(z_neighborhood, max_components=2)
    result.append([x_norm[i], y_norm[i], z_norm[i], optimal_n_components])

  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
100%|██████████| 10000/10000 [00:05<00:00, 1693.63it/s]


In [6]:
np.array(result).shape

(1103, 4)