In [None]:
%matplotlib qt
 
import matplotlib.pyplot as plt
import numpy as np
import os
import hdbscan
from scipy.stats import skew, kurtosis, mode
from scipy.spatial import KDTree
import json
import pickle
from hdbscan.prediction import approximate_predict
from sklearn.cluster import DBSCAN
from sklearn.mixture import GaussianMixture

In [None]:
def find_parts(directory_path):
    parts_paths = []

    for root, dirs, files in os.walk(directory_path):
        for folder in dirs:
            folder = os.path.join(root,folder)
            if ("Box1\AdaptiveZ_10mm" in folder) and folder.endswith("_4"):
                parts_paths.append(folder)
    parts_paths.sort(reverse=True)
    return parts_paths

base_path = r"\\192.168.1.100\CoreScan3-2\Acquisitions\RnD\XRF\CH\Macassa_clearance"

paths = find_parts(base_path)

In [None]:

def get_point_cloud(file_path):
    coords_file = os.path.join(file_path, '.component_parameters.txt')
    with open(coords_file) as file:
            lines = file.readlines()
                
            for line in lines:
                if "XRAY_DPP[Acquisition]#0.Y.Start:" in line:
                    y_offset = (float)(line.split("XRAY_DPP[Acquisition]#0.Y.Start:")[1].strip())
                elif "XRAY_DPP[Acquisition]#0.X.Start:" in line:
                    x_start = (float)(line.split("XRAY_DPP[Acquisition]#0.X.Start:")[1].strip())
                elif "XRAY_DPP[Acquisition]#0.X.Stop:" in line:
                    x_stop = (float)(line.split("XRAY_DPP[Acquisition]#0.X.Stop:")[1].strip())


    point_cloud = []

    if os.path.isdir(file_path):
        lidar_files = [fn for fn in os.listdir(
            file_path) if fn.endswith('.bpc')]
        if any(lidar_files):
            lidar_filename = file_path + os.sep + lidar_files[0]

    data = np.fromfile(lidar_filename, dtype=np.float32)
    point_cloud = data.reshape(-1, 3)  # to xyz

    ff = ~np.isnan(point_cloud).any(axis=1)
    point_cloud = point_cloud[ff, ...]

    point_cloud[:, 1] = point_cloud[:, 1] - float(y_offset)

    print(f"{file_path} is loaded. \n# of point {point_cloud.shape[0]}")


    matrix_file = (os.path.join(file_path, ".XRAY_DPP_001.lidar2xrf"))
    with open(matrix_file, 'r') as file:
        lines = file.readlines()

    transformation_matrix = np.array([list(map(float, line.strip().split(","))) for line in lines])

    num_points = point_cloud.shape[0]

    homogeneous_points = np.hstack((point_cloud, np.ones((num_points, 1))))
    transformed_points = homogeneous_points @ transformation_matrix.T
    point_cloud = transformed_points[:, :3]


    def trim_cloud(data):
        floor = mode(data[:, 2])[0] - 10
        print(floor)
        #data[:,2] = floor - data[:, 2] 
        data = data[data[:, 2] > 0]
        data = data[
        (data[:,0] >= x_stop) & 
        (data[:,0] <= x_start) 
        ]
        return data
    
    def remove_y_offset(data):
        data[:, 1] -= y_offset
        return data 

    point_cloud = trim_cloud(point_cloud)
    point_cloud = remove_y_offset(point_cloud)
    return point_cloud, x_start, x_stop

def trim_y(data, y_span=20):
   data = data[(data[:,1] >= -y_span) & (data[:,1] <= y_span)]
   return data


In [None]:
def downsample(data, n=100000):
    downsampled_indices = np.random.choice(data.shape[0], size=n, replace=False)

    x = data[:, 0]
    y = data[:, 1]
    z = data[:, 2]

    downsampled = np.zeros((n, 3))

    downsampled[:, 0] = x[downsampled_indices]
    downsampled[:, 1] = y[downsampled_indices]
    downsampled[:, 2] = z[downsampled_indices]
    return downsampled

point_clouds = []
x_offset = 0
for path in paths:
    temp_cloud, x_start, x_stop = get_point_cloud(path)
    x_offset -= x_stop
    temp_cloud[:,0] += x_offset
    x_offset += x_start
    point_clouds.append(temp_cloud)
   
point_cloud = np.vstack(point_clouds)
print(f"total # of point {point_cloud.shape[0]}")
point_cloud = trim_y(point_cloud,20)

plot_cloud = point_cloud.copy()
print(f"total # of point {point_cloud.shape[0]}")
print(len(np.unique(point_cloud[:,0])))

point_tree = KDTree(point_cloud[:,:2])

In [None]:
def get_vectors(x_span = 1, y_span = 12, step = 1,y_index = 0):
    vectors = {}
    x_values = np.unique(point_cloud[:, 0])
    y = y_index
    x = x_values.min()
    while(x < x_values.max()):
        distribution = get_distribution([x, y], x_span, y_span) 
        properties = get_props(distribution)
        if not np.any(np.isnan(properties)):
            vectors[x] = properties
        x += step
    return vectors


def get_distribution(point=[0, 0], x_span=10, y_span=15):
    search_radius = max(x_span, y_span)

    indices = point_tree.query_ball_point(point, search_radius)

    result_points = point_cloud[indices]

    filtered_points = result_points[
        (result_points[:, 0] >= point[0] - x_span) & (result_points[:, 0] <= point[0] + x_span) &
        (result_points[:, 1] >= point[1] - y_span) & (result_points[:, 1] <= point[1] + y_span)
    ]

    return filtered_points[:, 2]


def get_distribution_vs_y(point=[0, 0], x_span=10, y_span=15):
    search_radius = max(x_span, y_span)

    indices = point_tree.query_ball_point(point, search_radius)

    result_points = point_cloud[indices]

    filtered_points = result_points[
        (result_points[:, 0] >= point[0] - x_span) & (result_points[:, 0] <= point[0] + x_span) &
        (result_points[:, 1] >= point[1] - y_span) & (result_points[:, 1] <= point[1] + y_span)
    ]

    y_vals = {}

    for y in range(len(filtered_points[:,1])):
        if filtered_points[y,1] not in y_vals:
            y_vals[filtered_points[y,1]] = []
        y_vals[filtered_points[y,1]].append(filtered_points[y,2])

    return y_vals


def get_props(distribution):
    properties = []

    mean = np.mean(distribution)
    variance =  np.var(distribution)
    skw = skew(distribution)
    kurt = kurtosis(distribution)

    
    properties.append(variance)
    properties.append(skw)
    properties.append(kurt)

    norm = np.sum(x**2 for x in properties)**0.5
    
    properties = [x / norm for x in properties]

    
    properties.append(mean)

    z = properties[0]
    y = properties[1]
    x = properties[2]

    roe = np.sqrt(x**2 + y**2 + z**2)
    properties.append((np.atan2(y,x) + (2 * np.pi)) % (2 * np.pi))
    properties.append(np.acos(z/roe)/(np.pi/2))

    
    return properties

In [None]:

x_span = 1
y_span = 12
step = 10


# 0 = variance, 1 = skew, 2 = kurtosis, 3 = mean, 4 = azumithol, 5 = polar
properties = [1,5] 
#cluster size, sample size, metric, epsilon, alpha, max eps
parameters = []


parameters.append([30, 10, 'chebyshev', 0.06, 0.25, 0.1])



fig = plt.figure(figsize=(19,3))
point_cloud_copy = plot_cloud[(plot_cloud[:,1] >= 0 - y_span) & (plot_cloud[:,1] <= y_span)]

downsampled_indices = np.random.choice(point_cloud_copy.shape[0], size=50000, replace=False)

x = point_cloud_copy[:, 0] 
y = point_cloud_copy[:, 1] 
z = point_cloud_copy[:, 2] 

x_downsampled = x[downsampled_indices]
y_downsampled = y[downsampled_indices]
z_downsampled = z[downsampled_indices]


gs = fig.add_gridspec(len(parameters)+1, 2, hspace = 0.8, wspace = 0.3, width_ratios = [8,1] )

ax_scatter = fig.add_subplot(gs[0,0])
ax_scatter.scatter(
    x_downsampled,
    y_downsampled,
    c=z_downsampled,
    cmap='viridis',  
    s = 1
)

ax_scatter.set_title('LIDAR data heatmap')
ax_scatter.set_xlabel('X-axis')
ax_scatter.set_ylabel('Y-axis')



v_dict = get_vectors(x_span,y_span,step)

count = 0

for param in parameters:
    count += 1
    dbscan = hdbscan.HDBSCAN(min_cluster_size = param[0],
                            min_samples = param[1], 
                            metric = param[2], 
                            cluster_selection_epsilon= param[3],
                            alpha = param[4],
                            core_dist_n_jobs= -1,
                            cluster_selection_method='eom',
                            cluster_selection_epsilon_max = param[5])


    v_array = np.array(list(v_dict.values()))
    x_values = list(v_dict.keys())

    labels = dbscan.fit_predict(v_array[:, properties])

    
    initial_clusters = labels
    refined_clusters = []



    cluster_points = v_array[initial_clusters == 1]
    
    hdbscan_refined = hdbscan.HDBSCAN(min_cluster_size = 5,
                            min_samples = 1, 
                            metric = 'euclidean', 
                            cluster_selection_epsilon= 0.7,
                            alpha = 9.0,
                            core_dist_n_jobs= -1,
                            cluster_selection_method='eom',
                            cluster_selection_epsilon_max = 0.8)
    
    refined_labels = hdbscan_refined.fit_predict(cluster_points[:, [5]])*2 + 1
    
    refined_clusters.append(refined_labels)


    cluster_indices = np.where(initial_clusters == 1)[0]

    for i, idx in enumerate(cluster_indices):
        labels[idx] = refined_labels[i]

    
    


    bar_plot = fig.add_subplot(gs[count,0])
    added_labels = set()

    for x, core_type in zip(x_values, labels):
        label = f"Core Type {core_type}" if core_type not in added_labels else None
        if label:
            added_labels.add(core_type)
        bar_plot.bar(x, height=1, width=step*2, color=f"C{core_type + 2}" if core_type >= 0 else "black", edgecolor="none", label=label)


    bar_plot.set_title("Core Type by X-position")
    bar_plot.set_xlabel("X-Value")
    bar_plot.legend(title="Core Type", bbox_to_anchor=(1,2), loc="upper left")
    bar_plot.set_yticks([])

    ax = fig.add_subplot(gs[count,1])
    

    for vec, label in zip(v_dict.values(), labels):
        color = f"C{label + 2}" if label >= 0 else "black"

        ax.scatter(vec[1], vec[5], color=color, s=1)


plt.show()

#with open('hdbscan_1.pkl', 'wb') as f:
#    pickle.dump(dbscan, f)
#
#with open('hdbscan_2.pkl', 'wb') as f:
#    pickle.dump(hdbscan_refined, f)



In [None]:
x_span = 1
y_span = 12
step = 10


# 0 = variance, 1 = skew, 2 = kurtosis, 3 = mean, 4 = azumithol, 5 = polar
properties = [1,5] 
#cluster size, sample size, metric, epsilon, alpha, max eps
parameters = []


parameters.append([11, 'chebyshev', 0.06])



fig = plt.figure(figsize=(19,3))
point_cloud_copy = plot_cloud[(plot_cloud[:,1] >= 0 - y_span) & (plot_cloud[:,1] <= y_span)]

downsampled_indices = np.random.choice(point_cloud_copy.shape[0], size=50000, replace=False)

x = point_cloud_copy[:, 0] 
y = point_cloud_copy[:, 1] 
z = point_cloud_copy[:, 2] 

x_downsampled = x[downsampled_indices]
y_downsampled = y[downsampled_indices]
z_downsampled = z[downsampled_indices]


gs = fig.add_gridspec(len(parameters)+1, 2, hspace = 0.8, wspace = 0.3, width_ratios = [8,1] )

ax_scatter = fig.add_subplot(gs[0,0])
ax_scatter.scatter(
    x_downsampled,
    y_downsampled,
    c=z_downsampled,
    cmap='viridis',  
    s = 1
)

ax_scatter.set_title('LIDAR data heatmap')
ax_scatter.set_xlabel('X-axis')
ax_scatter.set_ylabel('Y-axis')



v_dict = get_vectors(x_span,y_span,step)

count = 0

for param in parameters:
    count += 1
    dbscan = DBSCAN(min_samples = param[0], 
                    metric = param[1], 
                    eps= param[2],
                    n_jobs= -1)


    v_array = np.array(list(v_dict.values()))
    x_values = list(v_dict.keys())

    labels = dbscan.fit_predict(v_array[:, properties])

    
    initial_clusters = labels
    refined_clusters = []



    cluster_points = v_array[initial_clusters == 1]
    
    dbscan_refined = DBSCAN(min_samples = 5, 
                             metric = "euclidean", 
                             eps= 0.06,
                             n_jobs= -1)


    refined_labels = hdbscan_refined.fit_predict(cluster_points[:, [4]])*2 + 1
    
    refined_clusters.append(refined_labels)

    cluster_indices = np.where(initial_clusters == 1)[0]

    for i, idx in enumerate(cluster_indices):
        labels[idx] = refined_labels[i]

    
    


    bar_plot = fig.add_subplot(gs[count,0])
    added_labels = set()

    for x, core_type in zip(x_values, labels):
        label = f"Core Type {core_type}" if core_type not in added_labels else None
        if label:
            added_labels.add(core_type)
        bar_plot.bar(x, height=1, width=step*2, color=f"C{core_type + 2}" if core_type >= 0 else "black", edgecolor="none", label=label)


    bar_plot.set_title("Core Type by X-position")
    bar_plot.set_xlabel("X-Value")
    bar_plot.legend(title="Core Type", bbox_to_anchor=(1,2), loc="upper left")
    bar_plot.set_yticks([])

    ax = fig.add_subplot(gs[count,1])

    core_points = [v_dict[key] for i, key in enumerate(v_dict) if i in dbscan.core_sample_indices_]
    labels_core_points = labels[dbscan.core_sample_indices_]

    #downsampled_indices = np.random.choice(len(core_points), size=600, replace=False)
    #
    #core_points = np.array(core_points)[downsampled_indices]
    #labels_core_points = labels_core_points[downsampled_indices]

    for vec, label in zip(core_points, labels_core_points):
        color = f"C{label + 2}" if label >= 0 else "black"

        ax.scatter(vec[1], vec[5], color=color, s=1)
print(len(core_points))
print(len(v_dict.values()))


plt.show()

with open('hdbscan_1.pkl', 'wb') as f:
    pickle.dump(dbscan, f)

with open('hdbscan_2.pkl', 'wb') as f:
    pickle.dump(dbscan_refined, f)



In [None]:

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')



hdbscan_refined = DBSCAN(min_samples = 5, 
                             metric = "euclidean", 
                             eps= 0.06,
                             n_jobs= -1)


refined_labels = hdbscan_refined.fit_predict(cluster_points[:, [0,1,2]])*2  +1

refined_clusters.append(refined_labels)
for i, idx in enumerate(cluster_indices):
    labels[idx] = refined_labels[i]

r2_labels = labels[cluster_indices]

added_labels = set()
for vec, label in zip(cluster_points, r2_labels):
    color = f"C{label + 2}" if label >= 0 else "black"
    ax.scatter(vec[0],vec[1],vec[2], color=color, s=1)


ax.set_title("3D Scatter of Vectors by Core Type", pad=20)
ax.set_xlabel("0")
ax.set_ylabel("1")
ax.set_zlabel("2")

plt.show()

In [None]:


x_span = 1
y_span = 12
step = 1
core_points = np.array(core_points)

c_points = core_points[:, [1, 5]]
c_type = labels_core_points

type_dict = {i: c_type[i] for i in range(len(c_points))}

tree = KDTree(c_points)

v_dict = get_vectors(x_span, y_span, step)

indices = [tree.query([v[1], v[5]], k=1, distance_upper_bound=0.1)[1] for v in v_dict.values()]

labels = np.array([type_dict[index] if index != len(c_points) else -1 for index in indices])

prediction = {x: label for x, label in zip(v_dict.keys(), labels)}

x_vals = [v_dict[x][1] for x in v_dict.keys()]
y_vals = [v_dict[x][5] for x in v_dict.keys()]
colors = [f"C{prediction[x] + 2}" if prediction[x] >= 0 else "black" for x in v_dict.keys()]



fig = plt.figure(figsize=(19,3))
point_cloud_copy = plot_cloud[(plot_cloud[:,1] >= 0 - y_span) & (plot_cloud[:,1] <= y_span)]

downsampled_indices = np.random.choice(point_cloud_copy.shape[0], size=50000, replace=False)

x = point_cloud_copy[:, 0] 
y = point_cloud_copy[:, 1] 
z = point_cloud_copy[:, 2] 

x_downsampled = x[downsampled_indices]
y_downsampled = y[downsampled_indices]
z_downsampled = z[downsampled_indices]


gs = fig.add_gridspec(2, 2, hspace = 0.5, wspace = 0.2, width_ratios = [9,1] )

ax_scatter = fig.add_subplot(gs[0,0])
ax_scatter.scatter(
    x_downsampled,
    y_downsampled,
    c=z_downsampled,
    cmap='viridis',  
    s = 1
)

ax_scatter.set_title('LIDAR data heatmap')
ax_scatter.set_xlabel('X-axis')
ax_scatter.set_ylabel('Y-axis')
  
bar_plot = fig.add_subplot(gs[1,0])

bar_plot.bar(v_dict.keys(), height=1, width=step*2, color=colors, edgecolor="none")

bar_plot.set_title("Core Type by X-position")
bar_plot.set_xlabel("X-Value")
bar_plot.set_yticks([])

ax = fig.add_subplot(gs[:,1])

ax.scatter(x_vals, y_vals, color=colors, s=1)

plt.show()





