In [None]:
%matplotlib qt
 
import matplotlib.pyplot as plt
import numpy as np
import os
import hdbscan
from scipy.stats import skew, kurtosis, mode
from scipy.spatial import KDTree
from scipy.ndimage import convolve
from sklearn.cluster import KMeans,DBSCAN
from sklearn.cluster import AgglomerativeClustering

In [None]:
def find_parts(directory_path):
    parts_paths = []

    for root, dirs, files in os.walk(directory_path):
        for folder in dirs:
            folder = os.path.join(root,folder)
            if ("Box14\AdaptiveZ_10mm" in folder) and folder.endswith("_4"):
                parts_paths.append(folder)
    parts_paths.sort(reverse=True)
    return parts_paths

base_path = r"\\192.168.1.100\CoreScan3-2\Acquisitions\RnD\XRF\CH\Macassa_clearance"

paths = find_parts(base_path)

In [None]:

def get_point_cloud(file_path):
    coords_file = os.path.join(file_path, '.component_parameters.txt')
    with open(coords_file) as file:
            lines = file.readlines()
                
            for line in lines:
                if "XRAY_DPP[Acquisition]#0.Y.Start:" in line:
                    y_offset = (float)(line.split("XRAY_DPP[Acquisition]#0.Y.Start:")[1].strip())
                elif "XRAY_DPP[Acquisition]#0.X.Start:" in line:
                    x_start = (float)(line.split("XRAY_DPP[Acquisition]#0.X.Start:")[1].strip())
                elif "XRAY_DPP[Acquisition]#0.X.Stop:" in line:
                    x_stop = (float)(line.split("XRAY_DPP[Acquisition]#0.X.Stop:")[1].strip())


    point_cloud = []

    if os.path.isdir(file_path):
        lidar_files = [fn for fn in os.listdir(
            file_path) if fn.endswith('.bpc')]
        if any(lidar_files):
            lidar_filename = file_path + os.sep + lidar_files[0]

    data = np.fromfile(lidar_filename, dtype=np.float32)
    point_cloud = data.reshape(-1, 3)  # to xyz

    ff = ~np.isnan(point_cloud).any(axis=1)
    point_cloud = point_cloud[ff, ...]

    point_cloud[:, 1] = point_cloud[:, 1] - float(y_offset)

    print(f"{file_path} is loaded. \n# of point {point_cloud.shape[0]}")


    matrix_file = (os.path.join(file_path, ".XRAY_DPP_001.lidar2xrf"))
    with open(matrix_file, 'r') as file:
        lines = file.readlines()

    transformation_matrix = np.array([list(map(float, line.strip().split(","))) for line in lines])

    num_points = point_cloud.shape[0]

    homogeneous_points = np.hstack((point_cloud, np.ones((num_points, 1))))
    transformed_points = homogeneous_points @ transformation_matrix.T
    point_cloud = transformed_points[:, :3]


    def trim_cloud(data):
        floor = mode(data[:, 2])[0] - 10
        print(floor)
        data[:,2] = floor - data[:, 2] 
        data = data[data[:, 2] > 0]
        data = data[
        (data[:,0] >= x_stop) & 
        (data[:,0] <= x_start) 
        ]
        return data
    
    def remove_y_offset(data):
        data[:, 1] -= y_offset
        return data 

    point_cloud = trim_cloud(point_cloud)
    point_cloud = remove_y_offset(point_cloud)
    return point_cloud, x_start, x_stop


def convolved_data(data,kernel):
    x_values = np.unique(data[:,0])
    y_values = np.unique(data[:,1])
    array = [[None for _ in range(len(y_values))] for _ in range(len(x_values))]
    map = {(row[0],row[1]): (index, row[2]) for index,row in enumerate(data)}

    for x in range (len(x_values)):
        for y in range (len(y_values)):
            x_val = x_values[x]
            y_val = y_values[y]
            z = map.get((x_val,y_val))
            if z is not None:
                array[x][y] = z[1]
            else:
                array[x][y] = 0
    
    array = convolve(array,kernel, mode = 'reflect')

    data2 = data.copy()

    for x in range (len(x_values)):
        for y in range (len(y_values)):
            x_val = x_values[x]
            y_val = y_values[y]
            index = map.get((x_val,y_val))
            if index is not None:
                data2[index[0],2] = array[x][y]
    
    return data2

def trim_y(data, y_span=20):
   data = data[(data[:,1] >= -y_span) & (data[:,1] <= y_span)]
   return data

    



    
    


In [None]:
point_clouds = []
x_offset = 0
for path in paths:
    temp_cloud, x_start, x_stop = get_point_cloud(path)
    x_offset -= x_stop
    temp_cloud[:,0] += x_offset
    x_offset += x_start
    point_clouds.append(temp_cloud)
   
point_cloud = np.vstack(point_clouds)
point_cloud = trim_y(point_cloud,20)

plot_cloud = point_cloud.copy()
print(f"total # of point {point_cloud.shape[0]}")
print(len(np.unique(point_cloud[:,0])))

point_tree = KDTree(point_cloud[:,:2])

In [None]:
kernel = np.array([
    [1,  0,  0,  -1,  0,   0,   1],
    [0, -2,  0,   0,  0,  -2,   0],
    [0,  0,  1,   0, -1,   0,   0],
    [-1, 0,  0,   4,  0,   0,  -1],
    [0,  0, -1,   0,  1,   0,   0],
    [0, -2,  0,   0,  0,  -2,   0],
    [1,  0,  0,  -1,  0,   0,   1]
])

point_cloud = convolved_data(plot_cloud, kernel)



In [None]:
def get_vectors(x_span = 1, y_span = 12, step = 1,y_index = 0):
    vectors = {}
    x_values = np.unique(point_cloud[:, 0])
    y = y_index
    x = x_values.min()
    while(x < x_values.max()):
        distribution = get_distribution([x, y], x_span, y_span) 
        properties = get_props(distribution)
        if not np.any(np.isnan(properties)):
            vectors[x] = properties
        x += step
    return vectors


def get_distribution(point=[0, 0], x_span=10, y_span=15):
    search_radius = max(x_span, y_span)

    indices = point_tree.query_ball_point(point, search_radius)

    result_points = point_cloud[indices]

    filtered_points = result_points[
        (result_points[:, 0] >= point[0] - x_span) & (result_points[:, 0] <= point[0] + x_span) &
        (result_points[:, 1] >= point[1] - y_span) & (result_points[:, 1] <= point[1] + y_span)
    ]

    return filtered_points[:, 2]


def get_distribution_vs_y(point=[0, 0], x_span=10, y_span=15):
    search_radius = max(x_span, y_span)

    indices = point_tree.query_ball_point(point, search_radius)

    result_points = point_cloud[indices]

    filtered_points = result_points[
        (result_points[:, 0] >= point[0] - x_span) & (result_points[:, 0] <= point[0] + x_span) &
        (result_points[:, 1] >= point[1] - y_span) & (result_points[:, 1] <= point[1] + y_span)
    ]

    y_vals = {}

    for y in range(len(filtered_points[:,1])):
        if filtered_points[y,1] not in y_vals:
            y_vals[filtered_points[y,1]] = []
        y_vals[filtered_points[y,1]].append(filtered_points[y,2])

    return y_vals


def get_props(distribution):
    properties = []

    mean = np.mean(distribution)
    variance =  np.var(distribution)
    skw = skew(distribution)
    kurt = kurtosis(distribution)

    
    properties.append(variance)
    properties.append(skw)
    properties.append(kurt)

    norm = np.sum(x**2 for x in properties)**0.5
    
    properties = [x / norm for x in properties]

    
    properties.append(mean)

    z = properties[0]
    y = properties[1]
    x = properties[2]

    roe = np.sqrt(x**2 + y**2 + z**2)
    properties.append((np.atan2(y,x) + (2 * np.pi)) % (2 * np.pi))
    properties.append(np.acos(z/roe))

    
    return properties

def polar_props(properties):
    z = properties[0]
    y = properties[1]
    x = properties[2]

    roe = np.sqrt(x**2 + y**2 + z**2)
    polar = np.acos(z/roe)
    azimuthal = (np.atan2(y,x) + (2 * np.pi)) % (2 * np.pi)
   

    return [azimuthal,polar]





In [None]:

vectors = get_vectors(x_span = 1, y_span = 13, step = 1,y_index = 0).values()



x_vals = [v[0] for v in vectors]
y_vals = [v[1] for v in vectors]
z_vals = [v[2] for v in vectors]

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

ax.scatter(x_vals, y_vals, z_vals, color='r', label='End Points', s= 0.2)



ax.set_xlabel('variance')
ax.set_ylabel('skew')
ax.set_zlabel('kurt')

plt.legend()
plt.show()

In [None]:

x_span = 1
y_span = 16
step = 1
n_clusters = 3
cluster_size = 100
samples = None
met = 'correlation'
eps = 1.0
a = 1.0

fig = plt.figure(figsize=(20,5))
point_cloud_copy = plot_cloud[(plot_cloud[:,1] >= 0 - y_span) & (plot_cloud[:,1] <= y_span)]

downsampled_indices = np.random.choice(point_cloud_copy.shape[0], size=100000, replace=False)

x = point_cloud_copy[:, 0] 
y = point_cloud_copy[:, 1] 
z = point_cloud_copy[:, 2] 

x_downsampled = x[downsampled_indices]
y_downsampled = y[downsampled_indices]
z_downsampled = z[downsampled_indices]

x2 = point_cloud[:, 0] 
y2 = point_cloud[:, 1] 
z2 = point_cloud[:, 2] 

downsampled_indices = np.random.choice(point_cloud.shape[0], size=100000, replace=False)

x2_downsampled = x2[downsampled_indices]
y2_downsampled = y2[downsampled_indices]
z2_downsampled = z2[downsampled_indices]

 

gs = fig.add_gridspec(4, 1, hspace = 2)

ax_scatter = fig.add_subplot(gs[1,0])
ax_scatter.scatter(
    x_downsampled,
    y_downsampled,
    c=z_downsampled,
    cmap='viridis',  
    s = 1
)

ax_scatter.set_title('LIDAR data heatmap')
ax_scatter.set_xlabel('X-axis')
ax_scatter.set_ylabel('Y-axis')
ax_scatter.set_yticks([])

ax_scatter2 = fig.add_subplot(gs[0,0])
ax_scatter2.scatter(
    x2_downsampled,
    y2_downsampled,
    c=z2_downsampled,
    cmap='viridis',  
    s = 1
)

ax_scatter2.set_title('transformed LIDAR data heatmap')
ax_scatter2.set_xlabel('X-axis')
ax_scatter2.set_ylabel('Y-axis')
ax_scatter2.set_yticks([])



v_dict = get_vectors(x_span,y_span,step)

polar_vectors = []
for vec in (v_dict.values()):
    vec = (polar_props(vec))

kmeans = AgglomerativeClustering(n_clusters)
kmeans.fit(list(v_dict.values()))


labels = kmeans.labels_
x_values = list(v_dict.keys())


bar_plot1 = fig.add_subplot(gs[2,0])
added_labels = set()

for x, core_type in zip(x_values, labels):
    label = f"Core Type {core_type}" if core_type not in added_labels else None
    if label:
        added_labels.add(core_type)
    bar_plot1.bar(x, height=1, width=step, color=f"C{core_type**2}", edgecolor="none", label=label)


bar_plot1.set_title("Core Type by X-position")
bar_plot1.set_xlabel("X-Value")
bar_plot1.legend(title="Core Type", bbox_to_anchor=(1,3), loc="upper left")
bar_plot1.set_yticks([])

dbscan = hdbscan.HDBSCAN(min_cluster_size = cluster_size,
                         min_samples = samples, 
                         metric = met, 
                         cluster_selection_epsilon= eps,
                         alpha = a,
                         core_dist_n_jobs= -1)


labels = dbscan.fit_predict(list(v_dict.values()))


bar_plot2 = fig.add_subplot(gs[3,0])
added_labels = set()

for x, core_type in zip(x_values, labels):
    label = f"Core Type {core_type}" if core_type not in added_labels else None
    if label:
        added_labels.add(core_type)
    bar_plot2.bar(x, height=1, width=step, color=f"C{core_type+2}", edgecolor="none", label=label)


bar_plot2.set_title("Core Type by X-position")
bar_plot2.set_xlabel("X-Value")
bar_plot2.legend(title="Core Type", bbox_to_anchor=(1,2), loc="upper left")
bar_plot2.set_yticks([])
plt.show()






In [None]:

x_span = 3.5
y_span = 12
step = 1

parameters = [ ]
#cluster size, sample size, metric, epsilon, alpha
parameters.append([50, 15, 'euclidean', 0.1, 0.2])
parameters.append([50, 15, 'euclidean', 0.1, 0.4])
parameters.append([50, 15, 'manhattan', 0.1, 0.2])
parameters.append([50, 15, 'manhattan', 0.1, 0.4])
parameters.append([50, 15, 'euclidean', 0.1, 0.8])
parameters.append([50, 15, 'euclidean', 0.1, 1.6])
parameters.append([50, 15, 'manhattan', 0.1, 0.8])
parameters.append([50, 15, 'manhattan', 0.1, 1.6])


fig = plt.figure(figsize=(16,9))
point_cloud_copy = plot_cloud[(plot_cloud[:,1] >= 0 - y_span) & (plot_cloud[:,1] <= y_span)]

downsampled_indices = np.random.choice(point_cloud_copy.shape[0], size=100000, replace=False)

x = point_cloud_copy[:, 0] 
y = point_cloud_copy[:, 1] 
z = point_cloud_copy[:, 2] 

x_downsampled = x[downsampled_indices]
y_downsampled = y[downsampled_indices]
z_downsampled = z[downsampled_indices]


gs = fig.add_gridspec(len(parameters)+1, 1, hspace = 5)

ax_scatter = fig.add_subplot(gs[0,0])
ax_scatter.scatter(
    x_downsampled,
    y_downsampled,
    c=z_downsampled,
    cmap='viridis',  
    s = 1
)

ax_scatter.set_title('LIDAR data heatmap')
ax_scatter.set_xlabel('X-axis')
ax_scatter.set_ylabel('Y-axis')



v_dict = get_vectors(x_span,y_span,step)

for key, vec in v_dict.items():
    v_dict[key] = polar_props(vec)

count = 0

for param in parameters:
    count += 1
    dbscan = hdbscan.HDBSCAN(min_cluster_size = param[0],
                            min_samples = param[1], 
                            metric = param[2], 
                            cluster_selection_epsilon= param[3],
                            alpha = param[4],
                            core_dist_n_jobs= 4)


    labels = dbscan.fit_predict(list(v_dict.values()))


    bar_plot = fig.add_subplot(gs[count,0])
    added_labels = set()

    for x, core_type in zip(x_values, labels):
        label = f"Core Type {core_type}" if core_type not in added_labels else None
        if label:
            added_labels.add(core_type)
        bar_plot.bar(x, height=1, width=step, color=f"C{core_type+2}", edgecolor="none", label=label)


    bar_plot.set_title("Core Type by X-position")
    bar_plot.set_xlabel("X-Value")
    bar_plot.legend(title="Core Type", bbox_to_anchor=(1,2), loc="upper left")
    bar_plot.set_yticks([])
plt.show()






In [None]:

x_span = 1
y_span = 12
step = 1


#cluster size, sample size, metric, epsilon, alpha
param = [10, 20, 'euclidean', 0.1, 0.5]


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
v_dict = get_vectors(x_span, y_span, step)
added_labels = set()


dbscan = hdbscan.HDBSCAN(min_cluster_size=param[0],
                         min_samples=param[1], 
                         metric=param[2], 
                         cluster_selection_epsilon=param[3],
                         alpha=param[4],
                         core_dist_n_jobs=4)

labels = dbscan.fit_predict(list(v_dict.values()))




for vec, label in zip(v_dict.values(), labels):
    color = f"C{label + 2}" if label >= 0 else "gray"

    ax.scatter(vec[4], vec[5], 0, color=color, s=1)

ax.set_title("3D Scatter of Vectors by Core Type", pad=20)
ax.set_xlabel('azimuthal')
ax.set_ylabel('polar')
ax.set_zlabel('mean')

plt.legend()
plt.show()




