In [None]:
import matplotlib.pyplot as plt
from plyfile import PlyData, PlyElement
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler

In [None]:
# TO DO: set your file path 
file_path = "path_to_your_LIDAR_generated_POINTCLOUD"

ply_data = PlyData.read(file_path)
vertices = ply_data['vertex']
vertex_data = ply_data['vertex']
headers = vertex_data.data.dtype.names

variables_per_header = dict()
for header in headers:
    variables_per_header[header] = []

for i, vertex in enumerate(vertices):
    for header in headers:
        variables_per_header[header].append(vertex_data[header][i])

original_df = pd.DataFrame.from_dict(variables_per_header)
original_df.columns

## Plot the values per feature in a histogram

In [None]:
for header in headers:
    data = np.array(variables_per_header[header])
    finite_data = data[np.isfinite(data)]

    if len(finite_data) > 0:
        plt.hist(finite_data, bins=30, color='skyblue', edgecolor='black')
        plt.xlabel(header)
        plt.ylabel('Frequency')
        plt.title(f'Histogram for {header}')
        plt.show()
    else:
        print(f"No valid data for {header} to plot.")

### Remove features that have for every point the same value

In [None]:
def remove_features_func(dictionairy, headers_to_remove):
    dictionairy_copied = dictionairy.copy()
    for header in headers_to_remove:
        if header in dictionairy_copied.keys():
            del dictionairy_copied[header]
    return pd.DataFrame.from_dict(dictionairy_copied)

In [None]:
features_zeroValues = ['scalar_Return_Number', 'scalar_Number_Of_Returns']
data_without_zero_features = remove_features_func(variables_per_header, features_zeroValues)

### Fill NaN values and infinity values

In [None]:
data_filtered = data_without_zero_features.copy()
data_filtered.fillna(data_filtered.mean(), inplace=True)
data_filtered.replace([np.inf, -np.inf], np.nan, inplace=True) 
data_filtered.fillna(data_filtered.mean(), inplace=True)

### Possible to apply a model and export the segmented point cloud as a .ply file

In [None]:
def apply_model(dataframe, model, labels=True):
    data_copy = dataframe.copy()
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data_copy)
    model.fit(scaled_data)
    if labels:
        data_copy['cluster'] = model.labels_
    else:
        data_copy['cluster'] = model.predict(scaled_data)
    return data_copy

In [None]:
def export_ply_file_withClusters(original_dataframe, clustered_dataframe, name_ply_file):
    columns_xyz = ['x', 'y', 'z']
    complete_dataframe = original_dataframe[columns_xyz].copy()
    complete_dataframe['cluster'] = clustered_dataframe['cluster'].values 
    dtypes = []
    for column in columns_xyz:
        dtypes.append((column, 'f4'))  
    dtypes.append(('cluster', 'i4')) 
    vertex_data = np.array(list(zip(*[complete_dataframe[col] for col in complete_dataframe.columns])), dtype=dtypes)
    vertex_element = PlyElement.describe(vertex_data, 'vertex')
    ply_data = PlyData([vertex_element], text=False)
    ply_data.write(name_ply_file)

### Trying different cluster models

In [None]:
# TO DO: change the model if you want to or change the hyperparameters
model = KMeans(n_clusters=14, random_state=42)
df_cluster_kMeans14 = apply_model(data_filtered, model)
export_ply_file_withClusters(original_df, df_cluster_kMeans14, "../../7. Results/Segmented Point Cloud/cluster_kMeans14.ply")

In [None]:
model = GaussianMixture(n_components=14, random_state=0)
df_clusterGM14 = apply_model(data_filtered, model, labels=False)
export_ply_file_withClusters(original_df, df_clusterGM14, "../../7. Results/Segmented Point Cloud/cluster_GM14.ply")