In [None]:
import matplotlib.pyplot as plt
from plyfile import PlyData, PlyElement
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
import seaborn as sns

In [None]:
# TO DO: set your file path 
file_path = "path_to_your_gaussian splatted_ply_file"

ply_data = PlyData.read(file_path)
vertices = ply_data['vertex']
vertex_data = ply_data['vertex']
headers = vertex_data.data.dtype.names

variables_per_header = dict()
for header in headers:
    variables_per_header[header] = []

for i, vertex in enumerate(vertices):
    for header in headers:
        variables_per_header[header].append(vertex_data[header][i])

## Plot the values per feature in a histogram

In [None]:
for header in headers:
    data = np.array(variables_per_header[header])
    finite_data = data[np.isfinite(data)]

    if len(finite_data) > 0:
        plt.hist(finite_data, bins=30, color='skyblue', edgecolor='black')
        plt.xlabel(header)
        plt.ylabel('Frequency')
        plt.title(f'Histogram for {header}')
        plt.show()
    else:
        print(f"No valid data for {header} to plot.")

In [None]:
original_df = pd.DataFrame.from_dict(variables_per_header)
original_df.columns

In [None]:
for col in original_df.columns:
    values = original_df[col].unique()
    print(f'For {col}, min = {min(values)}, max = {max(values)}')

f_dc_0,f_dc_1,f_dc_2: those are the direct (RGB) coefficients for the spherical harmonics.
f_rest : are parameters of spherical harmonics to storage the colors.
Scale : (x, y, z)
Rotation : (Quaternions i, j, k, and a scalar component w)


### Remove features that have for every point a value of 0

In [None]:
def remove_features_func(dictionairy, headers_to_remove):
    dictionairy_copied = dictionairy.copy()
    for header in headers_to_remove:
        if header in dictionairy_copied.keys():
            del dictionairy_copied[header]
    return pd.DataFrame.from_dict(dictionairy_copied)

In [None]:
# TO DO: based on the above information, give the names of all features that have the same value for every row. These features will be removed.
features_zeroValues = ['feat1', 'feat2']
data_without_zero_features = remove_features_func(variables_per_header, features_zeroValues)

corr_1 = data_without_zero_features.corr()
f, ax = plt.subplots(figsize=(10, 8))

sns.heatmap(corr_1, mask=np.zeros_like(corr_1),
            cmap=sns.diverging_palette(220, 10, as_cmap=True),
            square=True, ax=ax, vmin=-1, vmax=1)

### Fill NaN values and infinity values + Reduce the dimensions

In [None]:
data = data_without_zero_features.copy()
data.fillna(data.mean(), inplace=True)
data.replace([np.inf, -np.inf], np.nan, inplace=True) 
data.fillna(data.mean(), inplace=True)

In [None]:
f_rest_columns = [col for col in data.columns if 'f_rest' in col]
red_sperical_component = []
green_sperical_component = []
blue_sperical_component = []
for col in f_rest_columns:
    if int(col[7:]) % 3 == 0:
        red_sperical_component.append(col)
    elif int(col[7:]) % 3 == 1:
        green_sperical_component.append(col)
    elif int(col[7:]) % 3 == 2:
        blue_sperical_component.append(col)
        
f_dc_columns = [col for col in data.columns if 'f_dc' in col]

In [None]:
def apply_pca(data, columns, n_components=1):
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(data[columns])
    pca = PCA(n_components=n_components)
    principal_components = pca.fit_transform(data_scaled)
    return principal_components

if red_sperical_component: 
    red_spherical_pca = apply_pca(data, red_sperical_component)

if green_sperical_component: 
    green_spherical_pca = apply_pca(data, green_sperical_component)
    
if blue_sperical_component: 
    blue_spherical_pca = apply_pca(data, blue_sperical_component)

if f_dc_columns:
    f_dc_pca = apply_pca(data, f_dc_columns)

data['Red_Spherical_Component'] = red_spherical_pca
data['Green_Spherical_Component'] = green_spherical_pca
data['Blue_Spherical_Component'] = blue_spherical_pca
data['f_dc_Component'] = f_dc_pca

data_filtered = remove_features_func(data, f_rest_columns + f_dc_columns)

corr_2 = data_filtered.corr()
f, ax = plt.subplots(figsize=(10, 8))

sns.heatmap(corr_2, mask=np.zeros_like(corr_2),
            cmap=sns.diverging_palette(220, 10, as_cmap=True),
            square=True, ax=ax, vmin=-1, vmax=1)

### Possible to apply a model and export the segmented gaussian spat as .ply file

In [None]:
def apply_model(dataframe, model, labels=True):
    data_copy = dataframe.copy()
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data_copy)
    model.fit(scaled_data)
    if labels:
        data_copy['cluster'] = model.labels_
    else:
        data_copy['cluster'] = model.predict(scaled_data)
    return data_copy

In [None]:
def export_GS_file_withClusters(original_dataframe, clustered_dataframe, name_ply_file):
    complete_dataframe = original_dataframe.copy()
    complete_dataframe['cluster'] = clustered_dataframe['cluster'].values 
    dtypes = []
    for column in original_dataframe.columns:
        dtypes.append((column, 'f4'))  
    dtypes.append(('cluster', 'i4')) 
    vertex_data = np.array(list(zip(*[complete_dataframe[col] for col in complete_dataframe.columns])), dtype=dtypes)
    vertex_element = PlyElement.describe(vertex_data, 'vertex')
    ply_data = PlyData([vertex_element], text=False)
    ply_data.write(name_ply_file)

### Trying different clustering methods

In [None]:
# TO DO: change the model if you want to or change the hyperparameters
model = KMeans(n_clusters=14, random_state=42)
df_cluster_kMeans14 = apply_model(data, model)
export_GS_file_withClusters(original_df, df_cluster_kMeans14, "../../7. Results/Segmented Gaussian splats/segmented_GS_kMeans14.ply")

In [None]:
model = GaussianMixture(n_components=7, random_state=0)
df_clusterGM14 = apply_model(data_filtered, model, labels=False)
export_GS_file_withClusters(original_df, df_clusterGM14, "../../7. Results/Segmented Gaussian splats/segmented_GS_GaussianMixture14.ply")