In [None]:
import os
import cv2
import gc
import pickle
import matplotlib.pyplot    as plt
import numpy                as np
import cupy                 as cp
from scipy.stats       import skew, kurtosis
from scipy.ndimage     import convolve
from cupyx.scipy.ndimage import convolve as cpconvolve
from collections import deque
from scipy.interpolate import interp1d
from sklearn.neighbors import KDTree


In [None]:
#variables
part_path = r"C:\Users\eashenhurst\Desktop\local macassa"

window_height = 10

incident_beam_x_angle = 0
incident_beam_y_angle = 0

plot_sections = True
plot_labels = False
plot_rubble_sections = True

In [None]:
#functions

#vars

label, heights, convolved_heights, intensity, x_to_i, i_to_x, y_to_i, i_to_y, sectioned, final = 0,1,2,3,4,5,6,7,8,9

global tree
global labels

#kernels
gauss =  cp.array([
    [1,  4,  7,  4,  1],
    [4, 16, 26, 16,  4],
    [7, 26, 41, 26,  7],
    [4, 16, 26, 16,  4],
    [1,  4,  7,  4,  1]
]) / 273

gauss_y =  cp.array([
    [1],
    [4],
    [7],
    [4],
    [1]
]) / 17

sobel_y = cp.array([
        [-20.75,],
        [ -11.6,],
        [ -6.27,],
        [    -2,],
        [     0,],
        [     2,],
        [  6.27,],
        [  11.6,],
        [ 20.75,]
])
sobel_x = sobel_y.T

def interpolate_x(table_file,lidar_file):
    with open(table_file) as file:
        lines = file.readlines()

        positions = []
        table_timestamps = []

        char = '-'

        for line in lines:
            if ("+") in line:
                char = '+'
            time = line.split('T')[1]
            x = float(line.split(',')[0])
            parts = time.split(':')
            t = float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2].split(char)[0]) 
            positions.append(x)
            table_timestamps.append(t)

    ordered_positions = []
    ordered_timestamps = []

    decreasing = (positions[-1] - positions[0]) < 0 

    if decreasing:
        for i in range(len(positions)-1):
            if positions[i + 1] < positions[i]:
                ordered_positions.append(positions[i])
                ordered_timestamps.append(table_timestamps[i])
    else:
         for i in range(len(positions)-1):
            if positions[i + 1] > positions[i]:
                ordered_positions.append(positions[i])
                ordered_timestamps.append(table_timestamps[i])


    with open(lidar_file) as file:
        lines = file.readlines()

        lidar_timestamps = []

        time = lines[0].split('T')[1]
        parts = time.split(':')
        min_t = float(parts[1]) * 60  + float(parts[0]) * 3600 + float(parts[2].split(char)[0]) 

        for line in lines:
            time = line.split('T')[1]
            parts = time.split(':')
            t = float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2].split(char)[0]) - min_t
            lidar_timestamps.append(t)


    lidar_timestamps = np.array(lidar_timestamps)
    ordered_timestamps = np.array(ordered_timestamps)[::-1]
    ordered_timestamps -= min_t
    ordered_positions = ordered_positions[::-1]

    ordered_timestamps = np.concatenate([ordered_timestamps[:2], ordered_timestamps[-2:]])
    ordered_positions = np.concatenate([ordered_positions[:2], ordered_positions[-2:]])

    interp_func = interp1d(ordered_timestamps, ordered_positions, kind="linear", fill_value=(ordered_positions[-1], ordered_positions[0]), bounds_error=False) 

    interpolated = interp_func(lidar_timestamps)
            
    return interpolated

def get_point_cloud(paths, y_window = window_height + 5, upsample_ratio = 2):
    print("loading",end = "... ")

    x_start = 50000
    x_stop = 0
    y_offset = 0
    with open(paths[1]) as file:
            lines = file.readlines()
            for line in lines:
                if "XRAY_DPP[Acquisition]#0.X.Start:" in line:
                    x_start = (float)(line.split("XRAY_DPP[Acquisition]#0.X.Start:")[1].strip())
                if "XRAY_DPP[Acquisition]#0.X.Stop:" in line:
                    x_stop = (float)(line.split("XRAY_DPP[Acquisition]#0.X.Stop:")[1].strip())
                if "XRAY_DPP[Acquisition]#0.Y.Start:" in line:
                    y_offset = (float)(line.split("XRAY_DPP[Acquisition]#0.Y.Start:")[1].strip())
                    print(f"y_offset: {y_offset}")

    with open(paths[2]) as file:
        lines = file.readlines()
        transformation_matrix = np.array([list(map(float, line.strip().split(","))) for line in lines])

    imread = lambda fn: cv2.imread(fn, cv2.IMREAD_ANYDEPTH)
    
    point_cloud  = np.fromfile(paths[3], dtype=np.float32).reshape(-1, 3) 

    interpolated_x = interpolate_x(paths[5],paths[6])
    interpolated_x = np.repeat(interpolated_x, len(np.unique(point_cloud[:, 1])))

    point_cloud[:,0] = interpolated_x

    intensity_map = imread(paths[4])
    
    print(f"{paths[0]} loaded, {point_cloud.shape[0]} points")
    print("trimming",end = "... ")
    
    intensity_values = np.reshape(intensity_map, (-1, 1))
    print(np.nanmax(intensity_values))
    intensity_cloud = np.hstack((point_cloud[:,:2], intensity_values))

    point_cloud = (np.hstack((point_cloud, np.ones((point_cloud.shape[0], 1)))) @ transformation_matrix.T)[:,:3]
    intensity_cloud = (np.hstack((intensity_cloud, np.ones((intensity_cloud.shape[0], 1)))) @ transformation_matrix.T)[:,:3]

    mask = (point_cloud[:,0] <= x_start) & (point_cloud[:,0] >= x_stop) & ((np.abs(point_cloud[:,1])) <= y_window)
    point_cloud = point_cloud[mask]
    intensity_cloud = intensity_cloud[mask]

    min_intensity = np.nanmax(intensity_cloud[:,2])
    min_z = np.nanmax(point_cloud[:,2])
    
    print(f"trimmed to {point_cloud.shape[0]} points")

    print("converting to arrays",end = "... ")

    minimum_x = point_cloud[np.argmin(np.abs(point_cloud[:,0] - x_stop)),0]

    point_cloud[:,0] -= minimum_x
    intensity_cloud[:,0] -= minimum_x
    
    x_values = np.unique(point_cloud[:,0])
    y_values = np.unique(point_cloud[:,1]) 

    x_range = len(x_values)

    if upsample_ratio > 1:
        index_step = (np.nanmedian(np.diff(x_values))) / upsample_ratio 
        index_steps = np.arange(int(round(np.nanmax(x_values)/index_step))+1) * index_step

        x_range = len(index_steps)
        x_value_dict = {x: np.argmin(np.abs(index_steps - x)) for x in x_values}
        known_indices = list(x_value_dict.values())
        known_x_values = np.array(list(x_value_dict.keys()), dtype=float)
        all_indices = np.arange(x_range)

        interp_func = interp1d(known_indices, known_x_values, kind="linear", fill_value="extrapolate")
        x_values = interp_func(all_indices)
    
    x_value_dict = {x: index for index,x in enumerate(x_values)}
    y_value_dict = {y: index for index,y in enumerate(y_values)}

    point_array = np.full((len(y_values), x_range),np.nan)
    intensity_array = np.full((len(y_values), x_range),np.nan)
    point_dictionary = {(row[0],row[1]): (index, row[2]) for index,row in enumerate(point_cloud)}
    intensity_dictionary = {(row[0],row[1]): (index, row[2]) for index,row in enumerate(intensity_cloud)}

    for x in range (x_range):
        for y in range (len(y_values)):
            x_val = x_values[x]
            y_val = y_values[y]

            point_z = point_dictionary.get((x_val,y_val))
            intensity_z = intensity_dictionary.get((x_val,y_val))

            if point_z is not None:
                point_array[y,x] = point_z[1]
            else:
                point_array[y,x] = min_z
            if intensity_z is not None:
                intensity_array[y,x] = intensity_z[1]
            else:
                intensity_array[y,x] = min_intensity

    print(f"arrays built")
    if upsample_ratio > 1:
        print("upsampling",end = "... ")

        for y in range(len(y_values)):

            interpolated_z_values= []
            interpolated_i_values = []

            for dy in [-1,0,1]:
                y_dy = y + dy
                if (y_dy >= 0) & (y_dy < len(y_values)):
                    known_z = point_array[y_dy,known_indices]
                    known_i = intensity_array[y_dy,known_indices]
                    known_x = x_values[known_indices]
                    mask = ~np.isnan(known_z)
                    z_interp = interp1d(known_x[mask], known_z[mask], kind='linear', fill_value="extrapolate")
                    i_interp = interp1d(known_x[mask], known_i[mask], kind='linear', fill_value="extrapolate")

                    interpolated_z_values.append(z_interp(x_values))
                    interpolated_i_values.append(i_interp(x_values))

            point_array[y, :] = np.mean(interpolated_z_values, axis=0)
            intensity_array[y, :] = np.mean(interpolated_i_values, axis=0)
    
    kernel_y = np.array([[-1],[-2],[0],[2],[1]])
    convolved_array = np.abs(convolve(point_array,kernel_y))

    print(f"upsampled to {point_array.size} points")
    print(f"{paths[0]} finished \n")

    return [paths[0],point_array, convolved_array, intensity_array, x_value_dict, x_values, y_value_dict, y_values]

def get_props(distribution1, distribution2):
    properties = []
    return_properties = []

    variance =  np.var(distribution1)
    skw = skew(distribution1)
    kurt = kurtosis(distribution1)
    mean = np.sqrt(np.mean(distribution2))
    max = np.sqrt(np.max(distribution2))

    
    properties.append(variance)
    properties.append(skw)
    properties.append(kurt)

    norm = np.linalg.norm(properties)
    
    if norm > 0:
        properties = [x / norm for x in properties]

    z = properties[0]

    return_properties.append(mean)
    return_properties.append(max)
    return_properties.append(np.abs(properties[1]))
    return_properties.append(np.arctan(z)/(np.pi/2))


    return return_properties, np.uint8(properties[1] > 0)

def classify_fhe(distribution, distribution2, noise_threshold = 15):
    v,b = get_props(distribution,distribution2)
    closest_label = 0
    if not np.any(np.isnan(v)):  
        dist, ind = tree.query([v], k=1)  

        if dist[0][0] <= noise_threshold: 
            closest_label = labels[ind[0][0]] 
            
            if closest_label == 2:
                closest_label += b 
        
    return closest_label

def get_gradient(data):
    array_i = cp.array(np.log(np.abs(data[intensity])), dtype=cp.float32)
    array_z = cp.array(data[heights], dtype=cp.float32)
    y_values = data[i_to_y]

    y_grad_i = cp.abs(cpconvolve(array_i, sobel_y))
    x_grad_i = cp.abs(cpconvolve(array_i, sobel_x))
    magnitude_i = cp.sqrt(x_grad_i**2 + y_grad_i**2)
    y_grad_i = cp.abs(cpconvolve(array_i, sobel_y))
    magnitude_i = cp.sqrt(x_grad_i**2 + y_grad_i**2)

    x_grad_z = cp.abs(cpconvolve(array_z, sobel_y))
    y_grad_z = cp.abs(cpconvolve(array_z, sobel_x))
    magnitude_z = cp.sqrt(x_grad_z**2 + y_grad_z**2)
    y_grad_z = cp.abs(cpconvolve(magnitude_z, sobel_x))
    magnitude_z = cp.sqrt(x_grad_z**2 + y_grad_z**2)


    magnitude = cp.sqrt(magnitude_i * magnitude_z)

    y_bottom = np.argmin(np.abs(y_values - window_height))
    y_top = np.argmin(np.abs(y_values + window_height))

    magnitude[:y_top, :] = 0
    magnitude[y_bottom:, :] = 0
    
    return cp.asnumpy(magnitude)

def PCA(data, y_window=9, x_window=2, s = 0.001, w = 0.0005):
    array = cp.array(data[heights], dtype=cp.float32)
    y_values = data[i_to_y]
    height, width = array.shape
    y_range, x_range = 2 * y_window + 1, 2 * x_window + 1

    y_bottom = np.argmin(np.abs(y_values - window_height))
    y_top = np.argmin(np.abs(y_values + window_height))

    x_n = cp.arange(-x_window, x_window + 1) /2
    y_n = cp.arange(-y_window, y_window + 1) /4.5
    valid_x = cp.repeat(x_n, y_range).flatten()
    valid_y = cp.tile(y_n, x_range).flatten()

    points = cp.lib.stride_tricks.sliding_window_view(
        cp.pad(array, ((y_window,y_window), (x_window,x_window)), mode='edge'), (y_range, x_range)
    )
    del array

    points = points.reshape(height,width, y_range*x_range)
    
    points = cp.stack((
        cp.broadcast_to(valid_x, (height,width, y_range*x_range)),
        cp.broadcast_to(valid_y, (height,width, y_range*x_range)),
        points), axis=2)
    mean_vals = cp.mean(points[:,:,2,:], axis=(2), keepdims=True)
    points[:,:,2,:] -= mean_vals

    cov_matrices = cp.matmul(points, points.transpose(0, 1, 3, 2)) / (points.shape[3] - 1)
    del points 

    a, b, c, d, e, f, g, h, i = (
        cov_matrices[:, :, 0, 0], cov_matrices[:, :, 0, 1], cov_matrices[:, :, 0, 2],
        cov_matrices[:, :, 1, 0], cov_matrices[:, :, 1, 1], cov_matrices[:, :, 1, 2],
        cov_matrices[:, :, 2, 0], cov_matrices[:, :, 2, 1], cov_matrices[:, :, 2, 2]
    )

    p1 = b**2 + c**2 + f**2
    q = (a + e + i) / 3
    p2 = (a - q)**2 + (e - q)**2 + (i - q)**2 + 2 * p1
    p = cp.sqrt(p2 / 6)
    
    B = (cov_matrices - cp.eye(3, dtype=cp.float32) * q[:, :, None, None]) / p[:, :, None, None]
    r = cp.linalg.det(B) / 2
    phi = cp.arccos(cp.clip(r, -1, 1)) / 3
    del cov_matrices

    eigvals_2 = q + 2 * p * cp.cos(phi + (2 * cp.pi / 3))
    eigvals_2 = cp.asnumpy(eigvals_2)
    eigvals_2[:y_top, :] = 0
    eigvals_2[y_bottom:, :] = 0

    neighbours = [(-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1)]
    strong_y, strong_x = np.where(eigvals_2 >= s)
    weak_values = (eigvals_2 >= w) & (eigvals_2 < s).astype(np.uint8) 
    return_array = np.zeros_like(eigvals_2, dtype=cp.uint8)
    
    cp.get_default_memory_pool().free_all_blocks()

    queue = deque(zip(strong_y, strong_x))
    while queue:
        y, x = queue.popleft() 
        return_array[y, x] = 1
        for dy, dx in neighbours:
            ny, nx = y + dy, x + dx
            if 0 <= ny < weak_values.shape[0] and 0 <= nx < weak_values.shape[1]:
                if weak_values[ny, nx]:
                    weak_values[ny, nx] = 0 
                    queue.append((ny, nx)) 
    del weak_values 

    return return_array

def get_edges(data,s = 80, w = 30, y_window=9, x_window=2, s_e = 0.01, w_e = 0.0075):
    mask = PCA(data, y_window, x_window, s = s_e, w = w_e)
    NMS_magnitude = get_gradient(data)

    y_values = data[i_to_y]
    neighbours = [(-1, 0), (0, -1), (0, 1), (1, 0)]
   
    return_array = np.zeros_like(NMS_magnitude)

    NMS_array_masked = NMS_magnitude * mask 
    
    strong_y, strong_x = np.where(NMS_array_masked >= s)
    weak_edges = (NMS_magnitude >= w) & (NMS_array_masked < s).astype(np.uint8) 

    queue = deque(zip(strong_y, strong_x))
    while queue:
        y, x = queue.popleft() 
        return_array[y, x] = 1
        for dy, dx in neighbours:
            ny, nx = y + dy, x + dx
            if 0 <= ny < mask.shape[0] and 0 <= nx < mask.shape[1]:
                if weak_edges[ny, nx]:
                    weak_edges[ny, nx] = 0 
                    queue.append((ny, nx)) 

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (6,22))
    y_top = np.argmin(np.abs(y_values - window_height))
    y_bottom = np.argmin(np.abs(y_values + window_height))
    y_seed_point = np.argmin(np.abs(y_values))

    closed = cv2.morphologyEx(return_array, cv2.MORPH_CLOSE, kernel)
    closed[y_top, :] = 1
    closed[y_bottom, :] = 1

    return np.where(closed ==  1, 1 , np.nan), y_seed_point
    
def draw_sections(edge_array):
    edges1, seed_index = edge_array
    edges = edges1.copy() 

    neighbours = [(-1, 0), (0, -1), (0, 1), (1, 0)]  
    
    nan_mask = np.isnan(edges)  
    int = 2

    for x in range(edges.shape[1]):

        if nan_mask[seed_index, x]: 
            queue = deque([(seed_index, x)])

            while queue:
                y, x = queue.popleft()
                edges[y, x] = int 
                nan_mask[y, x] = False  
                
                neighbors_to_add = [
                    (ny, nx) for dy, dx in neighbours
                    if (0 <= (ny := y + dy) < edges.shape[0] and 
                        0 <= (nx := x + dx) < edges.shape[1] and 
                        nan_mask[ny, nx])  
                ]

                queue.extend(neighbors_to_add) 
                for ny, nx in neighbors_to_add:
                    nan_mask[ny, nx] = False 
                    
        int += 1
    edges = np.where(edges > 1, edges, np.nan)

    return edges

def label_x_values(data):
    print("finding edges...")
    edges, seed_index = get_edges(data)
    z_array = data[heights]
    cz_array = data[convolved_heights]
    neighbours = [(-1, 0), (0, -1), (0, 1), (1, 0)]
    x_values = np.zeros(z_array.shape[1], dtype=int)
    nan_mask = np.isnan(edges) 

    print("classifying sections...")

    for x in range(edges.shape[1]):
        center_x_vals = []
        avg_z_vals = {}
        avg_cz_vals = {}
        size = 0
        if nan_mask[seed_index, x]: 
            queue = deque([(seed_index, x)])
            while queue:
                y, x = queue.popleft()
                size += 1

                if y == seed_index:
                    center_x_vals.append(x)

                if y not in avg_z_vals.keys():
                    avg_z_vals[y] = []
                    avg_cz_vals[y] = []

                avg_z_vals[y].append(z_array[y,x])
                avg_cz_vals[y].append(cz_array[y,x])
                nan_mask[y, x] = False  

                neighbors_to_add = [
                    (ny, nx) for dy, dx in neighbours
                    if (0 <= (ny := y + dy) < edges.shape[0] and 
                        0 <= (nx := x + dx) < edges.shape[1] and 
                        nan_mask[ny, nx])  
                ]
                queue.extend(neighbors_to_add) 
                for ny, nx in neighbors_to_add:
                    nan_mask[ny, nx] = False
        
        if size > 1000:
            avg_z_vals = [np.nanmedian(z_values) for z_values in avg_z_vals.values()]
            avg_cz_vals = [np.nanmedian(z_values) for z_values in avg_cz_vals.values()]
            label = classify_fhe(avg_z_vals,avg_cz_vals)
            for x in center_x_vals:
                x_values[x] = label
    
    return x_values

def classify_rubble(data, x_window=1, y_window=4):
    pc = cp.array(data[heights])
    global results 
    x_values = data[i_to_x]
    y_values = data[i_to_y]
    x_value_labels = data[sectioned]
    y_center = np.argmin(np.abs(y_values))

    pc = pc[y_center - y_window:y_center + y_window + 1,:]

    width = pc.shape[1]
    y_range, x_range = 2 * y_window + 1, 2 * x_window + 1

    x_n = cp.arange(-x_window, x_window + 1) /2   # these values are estimates for the difference between points, they could be improved. 
    y_n = cp.arange(-y_window, y_window + 1) /4.5 # these values are estimates for the difference between points, they could be improved. 
    valid_x = cp.repeat(x_n, y_range).flatten()
    valid_y = cp.tile(y_n, x_range).flatten()

    global points

    points = cp.lib.stride_tricks.sliding_window_view(
        cp.pad(pc, ((0,0), (x_window,x_window)), mode='edge'), (y_range, x_range)
    )

    points = points.reshape(width, y_range*x_range)

    points = cp.stack((
        cp.broadcast_to(valid_x, (width, y_range*x_range)),
        cp.broadcast_to(valid_y, (width, y_range*x_range)),
        points), axis=2)
    mean_vals = cp.mean(points[:,:,2], axis=(1), keepdims=True)
    points[:,:,2] -= mean_vals

    cov_matrices = cp.matmul(points.transpose(0,2,1), points) / (y_range * x_range - 1)

    global eigvecs
    
    eigvals, eigvecs = cp.linalg.eigh(cov_matrices)

    eigvecs = eigvecs[:, :, 0]

    eigvals = eigvals[:,0]/(eigvals[:,0] + eigvals[:,1]  + eigvals[:,2]) 
    
    x_offset = cp.abs(cp.arctan(eigvecs[:,0]/eigvecs[:,2]))
    y_offset = cp.abs(cp.arctan(eigvecs[:,1]/eigvecs[:,2]))

    results = {x_values[i]: x_value_labels[i] if x_value_labels[i] != 0 else [float(eigvals[i]),float(x_offset[i]),float(y_offset[i])] for i in range(len(x_values))}

    cp.get_default_memory_pool().free_all_blocks()
    eigvecs = cp.asnumpy(eigvecs)
    return results

def define_correction_windows(point_cloud, window_size=10):
    rubble_dictionary = classify_rubble(point_cloud)  
    x_values = np.array(point_cloud[i_to_x]) 

    global windows
    
    windows = {}
    i = 0

    while i < len(x_values):
        x_start = x_values[i]
        values = [rubble_dictionary[x_start]] 

        i += 1
        while i < len(x_values) and x_values[i] - x_start <= window_size:
            values.append(rubble_dictionary[x_values[i]])
            i += 1

        x_end = x_values[i - 1]  
        
        labels = np.array([v for v in values if v in [1,2,3]])
        rubble_points = [v for v in values if v not in [0,1,2,3]]
        
        rubble_points = np.array(rubble_points)
    
        half_perc = np.sum(labels == 1) / len(values)
        empty_perc = np.sum(labels == 2) / len(values)
        full_perc = np.sum(labels == 3) / len(values)
        rubble_perc = 1 - half_perc - empty_perc - full_perc

        if rubble_points.size > 0:
            avg_var = np.nanmedian(rubble_points[:, 0])
            avg_x_offset = np.nanmedian(rubble_points[:, 1])
            avg_y_offset = np.nanmedian(rubble_points[:, 2])
        else:
            avg_var = avg_x_offset = avg_y_offset = np.nan  

        # Store results
        windows[(x_start, x_end)] = [half_perc, full_perc, empty_perc, rubble_perc, avg_var, avg_x_offset, avg_y_offset]

    return windows

In [None]:
#define parsers
class MacassaFileParser:
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.box_folders = []

    def find_folders(self):
        for root, dirs, files in os.walk(self.root_dir):
                for dir_name in dirs:
                    if dir_name.startswith("AdaptiveZ_10mm"):
                        folder = os.path.join(root, dir_name)
                        if os.path.isdir(folder): 
                            for part_folder in os.listdir(folder):
                                part_folder = os.path.join(dir_name,part_folder)
                                full_part_path = os.path.join(root, part_folder)
                                if os.path.isdir(full_part_path) and "Part" in part_folder:
                                    component_parameters_path = None
                                    lidar2xrf_path = None
                                    bpc_path = None
                                    real_dbg_path = None
                                    lidar_times_path = None
                                    for file_name in os.listdir(full_part_path):
                                        if file_name.endswith(".component_parameters.txt"):
                                            component_parameters_path = os.path.join(full_part_path, file_name)
                                        elif file_name.endswith(".lidar2xrf"):
                                            lidar2xrf_path = os.path.join(full_part_path, file_name)
                                        elif file_name.endswith(".bpc"):
                                            bpc_path = os.path.join(full_part_path, file_name)
                                        elif file_name.endswith("_intensity.png"):
                                            intensity_path = os.path.join(full_part_path, file_name)
                                        elif file_name.endswith("real.dbg"):
                                            real_dbg_path = os.path.join(full_part_path, file_name)
                                        elif file_name.endswith(".dbg"):
                                            lidar_times_path = os.path.join(full_part_path, file_name)

                                    self.box_folders.append((full_part_path,component_parameters_path, lidar2xrf_path, bpc_path, intensity_path, real_dbg_path, lidar_times_path))


    def get_box_folders(self):
        self.find_folders()
        return self.box_folders
    
class BasicParser:
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.box_folders = []

    def find_folders(self):
        for root, dirs, files in os.walk(self.root_dir):
                for dir_name in dirs:
                    if dir_name.startswith("Core"):
                        folder = os.path.join(root,dir_name)
                      
                        if os.path.isdir(folder) and "Part" in folder:
                            component_parameters_path = None
                            lidar2xrf_path = None
                            bpc_path = None
                            intensity_path = None
                            real_dbg_path = None
                            lidar_times_path = None
                            for file_name in os.listdir(folder):
                                if file_name.endswith(".component_parameters.txt"):
                                    component_parameters_path = os.path.join(folder, file_name)
                                elif file_name.endswith(".lidar2xrf"):
                                    lidar2xrf_path = os.path.join(folder, file_name)
                                elif file_name.endswith(".bpc"):
                                    bpc_path = os.path.join(folder, file_name)
                                elif file_name.endswith("_intensity.png"):
                                    intensity_path = os.path.join(folder, file_name)
                                elif file_name.endswith("real.dbg"):
                                    real_dbg_path = os.path.join(folder, file_name)
                                elif file_name.endswith(".dbg"):
                                    lidar_times_path = os.path.join(folder, file_name)

                            self.box_folders.append((folder,component_parameters_path, lidar2xrf_path, bpc_path, intensity_path, real_dbg_path, lidar_times_path))


    def get_box_folders(self):
        self.find_folders()
        return self.box_folders



In [None]:
#choose and use parser
parser = MacassaFileParser(part_path)
#parser = BasicParser(r"\\192.168.1.100\CoreScan3-2\Acquisitions\RnD\XRF 2.0\same core scans\AI21-TarcoreDoubleScans old xrf head\AI21-XRF1.0")
paths_list = parser.get_box_folders()

for name, component_parameters_path, lidar2xrf_path, bpc_path, intensity_path, real_dbg_path, lidar_times_path  in paths_list:
    print(f"Part: {name}")
    print(f"  Component Parameters: {component_parameters_path}")
    print(f"  LIDAR to XRF: {lidar2xrf_path}")
    print(f"  BPC File: {bpc_path}")
    print(f"  Intensity File: {intensity_path}")
    print(f"  lidar time stamps file: {lidar_times_path}")
    print(f"  real dbg file: {real_dbg_path}")

valid = np.sum([all(item is not None for item in sublist) for sublist in paths_list])

print(f"{len(paths_list)} part paths found")
print(f"{valid} valid paths found")

In [None]:
#Get the point clouds
point_clouds = []

for paths in paths_list: 
   if None not in paths:
        point_clouds.append(get_point_cloud(paths, upsample_ratio=2))


In [None]:
#load the label tree
with open('cluster_kd_tree.pkl', 'rb') as f:
    tree, labels = pickle.load(f)


In [None]:
for point_cloud in point_clouds:
    if len(point_cloud) > sectioned:
        point_cloud[sectioned] = label_x_values(point_cloud)
    else:
        point_cloud.append(label_x_values(point_cloud))
    print(f"done {point_cloud[0]} .\n")

In [None]:
if plot_labels:
    fig = plt.figure(figsize=(70, 1.5 * len(point_clouds)), dpi=150)
    gs = fig.add_gridspec(len(point_clouds), 1, hspace=0.025)

    colormap = ["red", "green", "blue", "purple"]

    for i, pc in enumerate(point_clouds):
        middle_y = np.argmin(np.abs(point_cloud[i_to_y]))
        ax = fig.add_subplot(gs[i, 0])
        display = pc[intensity]
        ax.imshow(np.flipud(display), cmap='bone', interpolation='nearest', alpha=1)

        x_vals = np.arange(display.shape[1])
        labels = np.array(list(pc[sectioned]))

        colors = np.array([colormap[label] for label in labels])

        ds_f = 4
        x_vals = x_vals[::ds_f]
        colors = colors[::ds_f]

        ax.bar(x_vals, height=8, width=ds_f, bottom=middle_y - 4, color=colors, alpha=0.85)

        ax.set_xlabel("X index")
        ax.set_ylabel("Y index")
        ax.set_title(pc[label], fontsize=15)

    plt.show()


In [None]:
if plot_sections:
    section_displays = []

    for point_cloud in point_clouds:
        edges = get_edges(point_cloud,s = 80, w = 30, y_window=9, x_window=2, s_e = 0.01, w_e = 0.0075)
        sections = draw_sections(edges)
        section_displays.append(sections)
        print(f"done {point_cloud[0]}")
    
    fig = plt.figure(figsize=(70, 1.5*len(point_clouds)), dpi=100)
    gs = fig.add_gridspec(len(point_clouds), 1, hspace=0.025)


    for (i,pc) , (i2, edges) in zip(enumerate(point_clouds), enumerate(section_displays)):
        ax = fig.add_subplot(gs[i, 0])
        
        ax.imshow(np.flipud(pc[heights]), cmap='bone_r', interpolation='nearest', alpha = 1)
        ax.imshow(cp.flipud(pc[intensity]), cmap='bone', interpolation='nearest', alpha = 0.65)
        ax.imshow(cp.flipud(edges), cmap='nipy_spectral', interpolation='nearest', alpha = 0.65)
        ax.set_xlabel("X index")
        ax.set_ylabel("Y index")
        ax.set_title(pc[0],fontsize = 20)

    plt.show()

In [None]:
for point_cloud in point_clouds:
    if len(point_cloud) > final:
        point_cloud[final] = define_correction_windows(point_cloud)
    else:
        point_cloud.append(define_correction_windows(point_cloud))
    print(f"done {point_cloud[0]} .\n")

In [None]:
if plot_rubble_sections:
    fig = plt.figure(figsize=(70, 2 * len(point_clouds)), dpi=55)
    gs = fig.add_gridspec(len(point_clouds), 1, hspace=0.075)

    colormap = ["green", "blue", "purple", "red"]
    cmap1 = plt.cm.cool  
    cmap2 = plt.cm.cool

    all_vars = []
    all_angles = []

    for pc in point_clouds:
        data = pc[final]  
        for values in data.values():
            var, x_angle, y_angle = values[4:7]
            all_vars.append(var)
            all_angles.append(x_angle)

    min_var = np.nanmin(all_vars)
    max_var = np.nanmax(all_vars)
    min_angle = np.nanmin(all_angles)
    max_angle = np.nanmax(all_angles)

    print(f"Min variance: {min_var}")
    print(f"Max variance: {max_var}")
    print(f"Min angle: {min_angle}")
    print(f"Max angle: {max_angle}")

    for i, pc in enumerate(point_clouds):
        middle_y = np.argmin(np.abs(point_cloud[i_to_y]))
        index_dictionary = pc[x_to_i]
        ax = fig.add_subplot(gs[i, 0])
        display = pc[intensity]
        ax.imshow(np.flipud(display), cmap='bone_r', interpolation='nearest', alpha=1)

        data = pc[final]  

        for (x_start, x_end), values in data.items():
            half_perc, empty_perc, full_perc, rubble_perc = values[:4]
            
            ratios = np.array([half_perc, empty_perc, full_perc, rubble_perc])
            ratios /= ratios.sum() 
            
            width = index_dictionary[x_end] - index_dictionary[x_start] - 1
            
            bar_heights = ratios * 20 

            bottoms = middle_y - 10 + np.insert(np.cumsum(bar_heights[:-1]), 0, 0)

            for j in range(4):
                ax.bar(index_dictionary[x_start] + 10, height=bar_heights[j], width=width, bottom=bottoms[j], color=colormap[j], alpha=0.65)
            if rubble_perc > 0:
                var, x_angle, y_angle = values[4:]

                norm1 = plt.Normalize(vmin=min_var, vmax=max_var)
                norm2 = plt.Normalize(vmin=min_angle, vmax=max_angle)

                color1 = cmap1(norm1(var))
                color2 = cmap2(norm2(x_angle))
                color3 = cmap2(norm2(y_angle))

                extra_heights = 15
                ax.bar(index_dictionary[x_start] + 10, height=extra_heights, width=width, 
                       bottom=0, color=color1, alpha=1)
                ax.bar(index_dictionary[x_start] + 10, height=extra_heights, width=width, 
                       bottom=15, color=color2, alpha=1)
                ax.bar(index_dictionary[x_start] + 10, height=extra_heights, width=width, 
                   bottom=30, color=color3, alpha=1)

        sm1 = plt.cm.ScalarMappable(cmap=cmap1, norm=norm1)
        sm2 = plt.cm.ScalarMappable(cmap=cmap2, norm=norm2)
        sm3 = plt.cm.ScalarMappable(cmap=cmap2, norm=norm2)

        cbar1 = plt.colorbar(sm1, ax=ax, orientation='vertical', fraction=0.03, pad=0.002)
        cbar2 = plt.colorbar(sm2, ax=ax, orientation='vertical', fraction=0.03, pad=0.004)
        cbar3 = plt.colorbar(sm3, ax=ax, orientation='vertical', fraction=0.03, pad=0.008)

        cbar1.set_label("variance")
        cbar2.set_label("x_angle")
        cbar3.set_label("y_angle")

        ax.set_xlabel("X index")
        ax.set_ylabel("Y index")
        ax.set_title(pc[label], fontsize=15)

    plt.show()


In [None]:
dont run this part

for point_cloud in point_clouds:
    if len(point_cloud) > final:
        folder = point_cloud[0] 
        os.makedirs(folder, exist_ok=True)
        
        file_path = os.path.join(folder, 'rubble_classification.pkl')
        
        with open(file_path, 'wb') as f:
            pickle.dump(point_cloud[final], f)

        print(f"Saved to {file_path}")
    