In [1]:
import laspy
import numpy as np
from scipy.spatial import KDTree
from user_inputs import laz_file_path, clip_folder_path
from user_inputs import search_radius, nn_threshold, bin_count_nn
from helper_functions import generate_bins, fraction_of_trues
import time

In [2]:
laz_file_name = laz_file_path.split("\\")[-1].split(".")
clip_laz_file_name = laz_file_name[0] + "_clipped." + laz_file_name[1]
clip_laz_path = clip_folder_path + "\\" + clip_laz_file_name
clip_laz_path

'D:\\Geomatics\\thesis\\RefineNet\\data\\AHN\\clipped\\C_37EN2_clipped.LAZ'

In [3]:
laz = laspy.read(clip_laz_path)

In [None]:
# printing out all the available dimensions in clipped laz file
for dimension in laz.point_format.dimensions:
    print(dimension.name)
print("\nunique classifications in clipped region: ", np.unique(laz.classification))

**Neighbourhood search steps**
- making a kd tree (first load the points)
- finding the nearest points within 0.5 m radius
    - we can first build np array (or pandas dataframe) with all the required points, classification (labels), rgb(?), intensities
    - then use np.where to find out the distances and indexes of the neighbour points
    - filter only those within 0.5m and look for their classifications
    - give confidence scores to points
    - add extra dimension to laz file

In [None]:
# loading the xyz values
xyz = laz.xyz.copy()
print("xyz shape: ", xyz.shape)
# build a kd tree
tree = KDTree(xyz)

In [None]:
# loading the classifications of points
labels = laz.classification
print("labels shape: ", labels.shape)

In [None]:
# binning for neighbourhood search, nobebook crashing when given all the points
num_points = laz.header.point_count
nn_bins = generate_bins(number=num_points, bin_count=bin_count_nn)
print(num_points)
print(nn_bins)

*Just implementing for one bin now*  
- **chose 10th bin**

In [None]:
start, end = nn_bins[10][0], nn_bins[10][1]
print(start, end)
## nearest neighbours -> nn within distance r = 0.5m 
nn = tree.query_ball_point(xyz[start:end, :], r=0.5, p=2, workers=-1, return_sorted=True)
# print(nn)
confidence_scores = []  # to add confidence of each point

**Now we have all the neighbouring points (including itself) for a given point**
- seperate the current point from the neighbours
- get classification of the current point
- get classification of the neighbour points
- **Calculate confidences**
    - **count the percentage of neighbour points are of same classification**
    - **points with neighbours < threshold => we would be giving -1 confidence.. should be altered later**

In [None]:
# pts labels -> (n, 1) np array
print(start, end)
pts = np.arange(start, end, step=1)
pts_vector = pts.reshape((-1, 1))
pts_labels = np.array(labels[pts])
pts_labels = pts_labels.reshape((-1, 1))  # reshaping it to a column vector -> easy to make comparison with neighbours
print("pts_labels: ", pts_labels)
print("pts_labels.shape: ", pts_labels.shape)

In [None]:
max_length = max(len(lst) for lst in nn)
nn_np_padded = np.array([lst + [np.nan] * (max_length - len(lst)) for lst in nn], dtype=object)  # self points should be removed
nn_np_padded_float = nn_np_padded.astype(float)  # self points should be removed

# remove the search point itself from the neighbours by creating a mask
self_mask = (pts_vector == nn_np_padded_float)
nn_np_padded[self_mask] = np.nan
nn_np_padded_float[self_mask] = np.nan


# flattening the neighbours array 
# so that it is easy to pass to labels and get the classificaitons
nn_np_padded_flat = nn_np_padded.reshape((1, -1))
nn_np_padded_float_flat = nn_np_padded_float.reshape((1, -1))
# nanMask ## True when the element is nan
nn_np_padded_float_flat_nanMask = np.isnan(nn_np_padded_float_flat)
nn_np_padded_Mask = ~np.isnan(nn_np_padded_float)

# replacing all the nan's with -1 index as a placeholder, later we can use mask again to filter them out 
nn_np_padded_flat[nn_np_padded_float_flat_nanMask] = -1  # replacing all the nan's with -1, now we can use this variable to get neighbours classifications
nn_labels_flat_pHold = labels[list(nn_np_padded_flat[0])]
nn_labels_flat_pHold = np.array(nn_labels_flat_pHold)
# nn_labels_flat = nn_labels_flat_pHold *  ~nn_np_padded_float_flat_nanMask
nn_labels_flat = nn_labels_flat_pHold.copy()
nn_labels_flat_float = nn_labels_flat.astype(float)
nn_labels_flat_float[nn_np_padded_float_flat_nanMask[0]] = np.nan

# 2d array => no more flattened
nn_labels_float = nn_labels_flat_float.reshape((nn_np_padded.shape[0], -1))
# indetifying 

print(nn_np_padded_float_flat_nanMask)
print(nn_labels_flat_pHold)
# print(nn_labels_flat_pHold.reshape((nn_np_padded.shape[0], -1)))
print("nn_labels_float: ", nn_labels_float)
print("nn_labels_float.shape: ", nn_labels_float.shape)
print("nn_labels_float[0]: ", nn_labels_float[0])

**Confidence = (label_match_count) / (neighbour_count)**
1. get match count
2. get neighbour count
3. calculate confidence

**After**
1. get threshold mask
2. less than 5 threshold number of neighbours, give confidence of -1

In [None]:
# 1. compare pts_labels with nn_labels
match_pts_nn_bool = (pts_labels == nn_labels_float)
match_pts_nn_num = np.sum(match_pts_nn_bool, axis=1, keepdims=True) # number of points that match
print("match_pts_nn_bool[0]: ", match_pts_nn_bool[0])
print("match_pts_nn_num[0]: ", match_pts_nn_num[0])

print("nn_np_padded_Mask[0]: \n", nn_np_padded_Mask[0])

# 2. nn_count, create nn mask to filter out the points which are not surrounded by many points
nn_count = np.sum(nn_np_padded_Mask, axis=1, keepdims=True)
nn_threshold_mask = (nn_count < nn_threshold)

print("nn_threshold: ", nn_threshold_mask)

print("nn_count[0]: ", nn_count[0])
print("nn_count: ", nn_count)
print("nn_count.max: ", np.max(nn_count))

# # 3. confidence
# confidences = 
# confidence_from_nn_2 = np.divide(match_pts_nn_num, nn_count, out=confidences, where=)
confidence_from_nn = match_pts_nn_num / nn_count
confidence_from_nn[nn_threshold_mask] = -1
print("confidence_from_nn: ", confidence_from_nn)
print("min confidence: ", np.min(confidence_from_nn, axis=0))
print("max confidence: ", np.max(confidence_from_nn, axis=0))
# print("min confidence: ", np.nanmin(confidence_from_nn, axis=0))
# print("max confidence: ", np.nanmax(confidence_from_nn, axis=0))
idx_min_confidence = np.where(confidence_from_nn == np.min(confidence_from_nn))

# no neighbours points
idxs_no_nn = np.where(confidence_from_nn == np.nan)
print(f"idxs_no_nn: {idxs_no_nn}")

# print("id with least confidence: ", idx_min_confidence)
# print("neighbors of least confident point: ", nn_np_padded[idx_min_confidence[0]])
# print("least confident point label: ", pts_labels[idx_min_confidence[0]])
# print("neightbours labels of least confident point: ", nn_labels_float[idx_min_confidence[0]])



# get the confidence scores of pts
# confidence = match_count / nn_count
# 1. get match_count
# 2. get nn_count

In [None]:
start_time = time.time()
current = start
for N in nn:
    pt_label = labels[current]
    # print(f"pt: {current}, label: {pt_label}")
    # print(f"N len: {len(N)}, N list: {N}")
    ## removing the point itself from the 
    if current in N:
        N.remove(current)
    # print(f"N len: {len(N)}, N list: {N}")
    N_count = len(N)  # number of neighbours within r (0.50m)
    N_labels = labels[N]
    # print(f"\tN_labels: {N_labels}")
    
    N_match_bool = pt_label == N_labels
    # print(f"match: {N_match_bool}")
    # calculating confidence from neighbours
    confidence_from_N = fraction_of_trues(N_match_bool)
    # print(f"\tlabel confidence: {confidence_from_N}")

    # confidence should be altered later

    current += 1
    # break

end_time = time.time()
elapsed_time_seconds = end_time - start_time
elapsed_time_minutes = elapsed_time_seconds / 60
print(f"Elapsed time: {elapsed_time_minutes:.4f} minutes")


**Should implement this**

In [None]:
for bin in nn_bins:
    

#### USELESS

In [None]:
max_length = max(len(lst) for lst in nn)
print("max_length: ", max_length)
nn_np_padded = np.array([lst + [np.nan] * (max_length - len(lst)) for lst in nn], dtype=object)
nn_np_padded_float = nn_np_padded.astype(float)

nn_np_padded_flat = nn_np_padded.reshape((1, -1))
nn_np_padded_float_flat = nn_np_padded_float.reshape((1, -1))
# mask
nn_np_padded_float_flat_nanMask = np.isnan(nn_np_padded_float_flat)

print(nn_np_padded_float_flat_nanMask)

nn_np_padded_flat[nn_np_padded_float_flat_nanMask] = -1
print(nn_np_padded_flat)
print(nn_np_padded_flat.shape)
# print(list(nn_np_padded_flat[0]))


In [None]:
labels[list(nn_np_padded_flat[0])]

In [None]:
start_time = time.time()

max_length = max(len(lst) for lst in nn)
print("max_length: ", max_length)
# nn_np_padded = np.array([lst + [np.nan] * (max_length - len(lst)) for lst in nn] , dtype=object)  # , dtype=object
nn_np_padded = np.array([lst + [np.nan] * (max_length - len(lst)) for lst in nn])  # , dtype=object
end_time = time.time()
elapsed_time_seconds = end_time - start_time
elapsed_time_minutes = elapsed_time_seconds / 60
print(f"Elapsed time: {elapsed_time_minutes:.4f} minutes")
print(nn_np_padded)
print(nn_np_padded.shape)
# print(nn.shape)

## first figure out which row (point) has maximum number of neighbours
mask = ~np.isnan(nn_np_padded)  # bool of neighbours, true if it has neighbour
print(mask)
max_nn_indices = np.where(np.sum(mask, axis=1) == max_length)
print("max_nn_indices: ", max_nn_indices)
print("type(max_nn_indices): ", type(max_nn_indices))
print("max_nn_indices[0]: ", max_nn_indices[0])

print(labels[nn_np_padded[max_nn_indices[0][0]]])

In [None]:
print(type(nn))
print(type(nn[5]))
print(nn.shape)
print(nn)
# print(len(nn[0]))
# # xyz[nn].shape
# print(nn)
# np.array(nn)
# xyz[np.array(nn[0])]

In [None]:
# np.array(nn[0])
# nn
# max_length_list = max(nn, key=len)
# max_length_list
print(labels[nn[5]])
xyz[np.NaN]


In [None]:
print(laz.xyz.shape)
print(laz.xyz)

In [None]:
max_length = max(len(lst) for lst in nn)
nn_np_padded = np.array([lst + [np.nan] * (max_length - len(lst)) for lst in nn], dtype=object)  # self points should be removed
nn_np_padded_float = nn_np_padded.astype(float)  # self points should be removed

print("BEFORE: ")
print(nn_np_padded_float[100])

# remove the search point itself from the neighbours by creating a mask
self_mask = (pts_vector == nn_np_padded_float)
nn_np_padded[self_mask] = np.nan
nn_np_padded_float[self_mask] = np.nan

print("AFTER:")
print(nn_np_padded_float[100])


# flattening the neighbours array 
# so that it is easy to pass to labels and get the classificaitons
nn_np_padded_flat = nn_np_padded.reshape((1, -1))
nn_np_padded_float_flat = nn_np_padded_float.reshape((1, -1))
# nanMask ## True when the element is nan
nn_np_padded_float_flat_nanMask = np.isnan(nn_np_padded_float_flat)
nn_np_padded_Mask = ~np.isnan(nn_np_padded_float)

print("MASK:")
print(nn_np_padded_Mask[100])

# replacing all the nan's with -1 index as a placeholder, later we can use mask again to filter them out 
nn_np_padded_flat[nn_np_padded_float_flat_nanMask] = -1  # replacing all the nan's with -1, now we can use this variable to get neighbours classifications
nn_labels_flat_pHold = labels[list(nn_np_padded_flat[0])]
nn_labels_flat_pHold = np.array(nn_labels_flat_pHold)
# nn_labels_flat = nn_labels_flat_pHold *  ~nn_np_padded_float_flat_nanMask
nn_labels_flat = nn_labels_flat_pHold.copy()
nn_labels_flat_float = nn_labels_flat.astype(float)
nn_labels_flat_float[nn_np_padded_float_flat_nanMask[0]] = np.nan

# 2d array => no more flattened
nn_labels_float = nn_labels_flat_float.reshape((nn_np_padded.shape[0], -1))
# indetifying 

# print(nn_np_padded_float_flat_nanMask)
# print(nn_labels_flat_pHold)
# # print(nn_labels_flat_pHold.reshape((nn_np_padded.shape[0], -1)))
# print("nn_labels_float: ", nn_labels_float)
# print("nn_labels_float.shape: ", nn_labels_float.shape)
print("CHECK:")
print(nn_labels_float[100])