In [1]:
# User Defined Package
import src.modules.utils as util
import src.modules.MultipleReturnsClassification as MRC
import src.modules.LasFilePreprocessing as LFP
import src.modules.ExtractGroundPlane as GP


from multiprocessing import Pool
import logging
import os

import numpy as np
import pandas as pd
from pyproj import Transformer
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
from kneed import KneeLocator
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score # How best can we seperate clusters
import pptk
import time
from scipy import spatial
import re #parsing log files
from scipy.signal import find_peaks
from scipy.spatial import ConvexHull
import alphashape
from shapely.geometry import Point
import json
import laspy



In [2]:
#Logging Functions

def InitiateLogger(filename: str, year:int)-> None:

    LoggerPath = "Datasets/"+"Package_Generated/"+filename[:-4]+"/"+str(year)+"/Logs_"+filename[:-4]+"/"

    print("Logger Folder Path : ",LoggerPath)
    # Check whether the specified pptk_capture_path exists or not
    isExist = os.path.exists(LoggerPath)

    if not isExist:
    # Create a new directory because it does not exist 
        os.makedirs(LoggerPath)

    logfilename = LoggerPath + filename+'.log' 
    logger = logging.getLogger()
    fhandler = logging.FileHandler(filename=logfilename, mode='a')
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fhandler.setFormatter(formatter)
    logger.addHandler(fhandler)
    logger.setLevel(logging.INFO)

def parse_log_Completedfiles(log_file_path):
    """_summary_

    Args:
        log_file_path (_type_): _description_

    Returns:
        _type_: _description_
    
    Usage:
    log_file_path = 'TEST_log.log'
    filenames = parse_log_Completedfiles(log_file_path)
    print(filenames)
    """
    filenames = []
    with open(log_file_path, 'r') as log_file:
        for line in log_file:
            match = re.search(r'(?<=Completed file: )\S+', line)
            if match:
                filenames.append(match.group())
    return filenames

In [3]:
def Get_filenames(folder_path:str, year:int)->list:
    folder_path = folder_path+'NYC_'+str(year)+'/'
    filenames = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    return filenames

def Delete_File(file_path:str)->None:
    if os.path.isfile(file_path):
        os.remove(file_path)
        print(f"{file_path} has been deleted.")
    else:
        print(f"{file_path} not found.")

In [4]:
# Extract Ground
def Extract_GroundData(filename:str,year:int, TileDivision=30, lasfiles_folderpath= 'Datasets/FTP_files/LiDAR/') -> tuple:
    
    s_start = time.time()
    rows, cols = (TileDivision, TileDivision)

    lasfilepath = lasfiles_folderpath+'NYC_'+str(year)+'/'+filename
    lasfile_object = LFP.Read_lasFile(lasfilepath)
    lidar_df, rawpoints = LFP.Create_lasFileDataframe(lasfileObject=lasfile_object)

    #MR_df = LFP.Get_MRpoints(lidar_df) Not needed for ground points
    SR_df = LFP.Get_SRpoints(lidar_df)

    #lasTile class
    TileObj = LFP.lasTile(SR_df,TileDivision)

    #Serialized
    s_start = time.time()
    lidar_TilesubsetArr = TileObj.Get_subtileArray()

    s_end = time.time()
    stime = s_end - s_start
    logging.info("Extraction of Subtile Matrix Buffer Serial Time for %s = %d",filename,stime)

    #Ground Plane Classifcation - Serial Implementation
    g_start = time.time()
    Potential_Ground_Points = []
    Other_points = []

    GP_obj = GP.GP_class()

    for row in range(TileDivision):
        for col in range(TileDivision):

            tile_segment_points = lidar_TilesubsetArr[row][col].iloc[:,:3].to_numpy()

            Ground_Points, NGround_Points = GP_obj.Extract_GroundPoints(tile_segment_points)

            for k in Ground_Points:
                Potential_Ground_Points.append(k) #append points which may be potentially ground points
            for l in NGround_Points:
                Other_points.append(l) #append points which may be potentially ground points
    Potential_Ground_Points = np.array(Potential_Ground_Points)
    Other_points = np.array(Other_points)
    
    g_end = time.time()
    gtime = g_end - g_start
    logging.info("Ground Point Extraction Algorithm Serial Time for %s = %d",filename,gtime)

    return Potential_Ground_Points, Other_points, np.mean(Potential_Ground_Points[:,2])

In [5]:
#MR Classification
def Log_TileLocation(MR_df):

    #Print Lat , Long
    ix, iy = np.mean(MR_df.X.to_numpy()), np.mean(MR_df.Y.to_numpy()) 

    transformer = Transformer.from_crs("epsg:2263", "epsg:4326")
    lat, lon = transformer.transform(ix*3.28, iy*3.28)
    location_str = str(lat)+","+str(lon)

    return location_str, lat,lon

def ConvertLatLong(points): # converting to ft ( 2263 - 4326)

    x = points[0]*3.28
    y = points[1]*3.28
    transformer = Transformer.from_crs("epsg:2263", "epsg:4326")
    lat, lon = transformer.transform(x,y)
    return [lat,lon]

def PreprocessLasFile(f, year, TileDivision=10, lasfiles_folderpath='Datasets/FTP_files/LiDAR/'):

    #Get File_ID -> 1001.las - 1001
    las_fileID = int(''.join(c for c in f if c.isdigit())) 
    #Object to handle las preprocessing
    LasHandling = MRC.LFP
    #Get path
    lasfilepath = lasfiles_folderpath+'NYC_'+str(year)+'/'+str(las_fileID)+'.las'
    #Read las file
    lasfile_object = LasHandling.Read_lasFile(lasfilepath)
    #Define Tile Subdivisions
    TileDivision = TileDivision
    #rows, cols = (TileDivision, TileDivision)
    #Create Dataframe from lasfile
    lidar_df, rawpoints = LasHandling.Create_lasFileDataframe(lasfileObject=lasfile_object)

    #Extract MR and SR points from Dataframe
    MR_df = LasHandling.Get_MRpoints(lidar_df)
    SR_df = LasHandling.Get_SRpoints(lidar_df)

    return lidar_df, rawpoints, MR_df, SR_df

def Get_eps_NN_KneeMethod(cluster_df, N_neighbors = 12, display_plot=False):

    nearest_neighbors = NearestNeighbors(n_neighbors=N_neighbors)
    neighbors = nearest_neighbors.fit(cluster_df)
    distances, indices = neighbors.kneighbors(cluster_df)
    distances = np.sort(distances[:,N_neighbors-1], axis=0)

    i = np.arange(len(distances))
    knee = KneeLocator(i, distances, S=1, curve='convex', direction='increasing', interp_method='polynomial')
    if (display_plot):
        fig = plt.figure(figsize=(5, 5))
        knee.plot_knee()
        plt.xlabel("Points")
        plt.ylabel("Distance")
        print(distances[knee.knee])
    
    return distances[knee.knee]

def Normalize_points(points):
    return points / np.linalg.norm(points)

In [9]:
#MAIN
stime = time.time()

#Get las filenames
lasfiles_folderpath = 'Datasets/FTP_files/LiDAR/'
year = 2021

LAS_filenames = Get_filenames(lasfiles_folderpath, year)

print("FileCount : "+str(len(LAS_filenames))+" .las files found in path = "+lasfiles_folderpath )

#NOTE: Script assumes that all files have been downloaded
f = LAS_filenames[3]
print(f)

FileCount : 12 .las files found in path = Datasets/FTP_files/LiDAR/
25192.las


In [10]:
lasfilepath = lasfiles_folderpath+'NYC_'+str(year)+'/'+f
lasfile_object = LFP.Read_lasFile(lasfilepath)
lidar_df, rawpoints = LFP.Create_lasFileDataframe(lasfileObject=lasfile_object)

In [11]:
lidar_df.classification.value_counts()

5.0    7407337
2.0    3542877
1.0    2765796
6.0    2353862
4.0     673315
8.0     397579
3.0     242659
Name: classification, dtype: int64

In [12]:
lidar_df = lidar_df[lidar_df["classification"] != 18] #removing high noise
lidar_df = lidar_df[lidar_df["classification"] != 7] #removing  noise

In [13]:
ConvertLatLong([np.mean(lidar_df.X/3.28), np.mean(lidar_df.Y/3.28)])

[40.69844933830165, -73.8486019337876]

In [14]:
Ground = lidar_df[lidar_df["classification"] == 2]
Buildings = lidar_df[lidar_df["classification"] == 6]

#Extract classification labels 3 4 and 5
Vegetation = lidar_df[lidar_df["classification"] == 3]
Vegetation = Vegetation.append(lidar_df[lidar_df["classification"] == 4])
Vegetation = Vegetation.append(lidar_df[lidar_df["classification"] == 5])

Ground = Ground.iloc[:,:3].to_numpy() #Get only X,Y,Z
Buildings = Buildings.iloc[:,:3].to_numpy() #Get only X,Y,Z
Vegetation = Vegetation.iloc[:,:3].to_numpy() #Get only X,Y,Z

All_points_1 = np.concatenate((Ground, Buildings, Vegetation), axis=0)
rgb_Ground =  [[1,0,0]]*len(Ground) #Set red colour
rgb_Buildings = [[255,255,255]]*len(Buildings) #set green colour - Classified tree points
rbg_Vegetation = [[0,1,0]]*len(Vegetation) #set blue colour - Classified Vegetation points
All_rgb = np.concatenate((rgb_Ground, rgb_Buildings, rbg_Vegetation), axis=0)

v = pptk.viewer(All_points_1, All_rgb)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,1])
v.set(point_size = 0.04)

In [None]:
stopper

In [18]:
#Initate logger
InitiateLogger(f,year)
logging.info("TerraVide lidar processing Initated")

logging.info("Ground Plane Algorithm initated for : "+f)
if year == 2021:
    Gpoints = lidar_df[lidar_df["classification"] == 2].iloc[:,:3].to_numpy()
    NGpoints = lidar_df[lidar_df["classification"] != 2].iloc[:,:3].to_numpy()
    Elevation = np.mean(Gpoints[:,2])
else:
    Gpoints, NGpoints, Elevation = Extract_GroundData(f, year, TileDivision=60)


Logger Folder Path :  Datasets/Package_Generated/25192/2021/Logs_25192/


In [25]:
Vegetation_df

array([[1.02591137e+06, 1.93770615e+05, 2.82286585e+01],
       [1.02591303e+06, 1.93769380e+05, 2.87231707e+01],
       [1.02591343e+06, 1.93769682e+05, 2.86024390e+01],
       ...,
       [1.02660230e+06, 1.93731573e+05, 2.88128049e+01],
       [1.02662911e+06, 1.93743216e+05, 2.95707317e+01],
       [1.02660625e+06, 1.93733632e+05, 3.11646341e+01]])

In [26]:
#Processing 2021 25192.las

#Extract classification labels 3 4 and 5
Vegetation = lidar_df[lidar_df["classification"] == 3]
Vegetation = Vegetation.append(lidar_df[lidar_df["classification"] == 4])
Vegetation = Vegetation.append(lidar_df[lidar_df["classification"] == 5])

Vegetation_df = Vegetation.iloc[:,:3]

TileObj_MR = MRC.MR_class(Vegetation_df,TileDivision=10) #Multiple Return Points

#Serialized Creation of Lidar Subtiles
lidar_TilesubsetArr = TileObj_MR.Get_subtileArray()

In [27]:
#Trees buffer

Tilecounter = 0
Trees_Buffer = []
N_Neighbours = 12

for row in range(10):
    for col in range(10):

        #print('-'*40)
        
        #print("TILE ID : ",Tilecounter)
        Tilecounter = Tilecounter + 1

        if (len(lidar_TilesubsetArr[row][col].iloc[:,:3].to_numpy()) > N_Neighbours):

            cluster_df = lidar_TilesubsetArr[row][col].iloc[:,:3]
            tile_eps = Get_eps_NN_KneeMethod(cluster_df) #round(Optimal_EPS,2)
            #print(tile_eps)
            tile_segment_points = lidar_TilesubsetArr[row][col].iloc[:,:3].to_numpy()
            subTileTree_Points,  _ = TileObj_MR.Classify_MultipleReturns(tile_segment_points,tile_eps)

            for t in subTileTree_Points:
                Trees_Buffer.append(t)
            
            logging.info("MR - T_ID : %s - ACTION: Trees Added to - S_ID : %d",f,Tilecounter)
        
        else:
            logging.warn("Empty Tileset Found")

Trees_Buffer = np.array(Trees_Buffer)

In [29]:
pptk.viewer(Trees_Buffer)

<pptk.viewer.viewer.viewer at 0x7fea0970c240>

In [30]:
Approx_locations_str, T_lat, T_lon = Log_TileLocation(Vegetation_df)
logging.info("Approximate Location of %s : %s", f,Approx_locations_str)

In [31]:
All_eps = [] #Stores all eps values by tile id
N_Neighbours = 12
subT_ID = 0
TileDivision =10
EPS_distribution_df = pd.DataFrame(columns=['T_ID', 'T_lat', 'T_lon', 'subT_ID', 'subT_lat','subT_lon','EPS'])

for row in range(TileDivision):
    for col in range(TileDivision):

        if(len(lidar_TilesubsetArr[row][col].iloc[:,:3].to_numpy()) > N_Neighbours):

            cluster_df = lidar_TilesubsetArr[row][col].iloc[:,:3]
            subtile_location_str, subT_lat, subT_long = Log_TileLocation(cluster_df)
            subtile_eps = Get_eps_NN_KneeMethod(cluster_df)
            All_eps.append(subtile_eps)

        EPS_dist_df_row = [f,T_lat,T_lon]
        EPS_dist_df_row.append(subT_ID)
        EPS_dist_df_row.append(subT_lat)
        EPS_dist_df_row.append(subT_long)
        EPS_dist_df_row.append(subtile_eps)

        EPS_distribution_df.loc[len(EPS_distribution_df.index)] = EPS_dist_df_row
        
        subT_ID = subT_ID + 1

Optimal_EPS = np.mean(All_eps)
logging.info("Avg EPS for %s : %s",f,Optimal_EPS)

EPS_CSV_filename = 'Spatial_HP_Distribution_'+f+"_"+str(year)+'.csv'
EPS_CSV_dir = "Datasets/"+"Package_Generated/"+f[:-4]+"/"+str(year)+"/LiDAR_HP_MATRIX_"+f[:-4]+"/"
# Check whether the specified EPS_CSV_dir exists or not
isExist = os.path.exists(EPS_CSV_dir)

if not isExist:
# Create a new directory because it does not exist 
    os.makedirs(EPS_CSV_dir)

logging.info("MR - T_ID : %s - ACTION: HP_MATRIX CSV file Created",f)
EPS_distribution_df.to_csv(EPS_CSV_dir+EPS_CSV_filename)

In [32]:
db = DBSCAN(eps=np.mean(EPS_distribution_df.EPS), min_samples=30).fit(Trees_Buffer)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
DB_labels = db.labels_

In [33]:
rgb_array = assign_colors(Trees_Buffer, DB_labels)
v = pptk.viewer(Trees_Buffer)
v.attributes(rgb_array/255)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,1])
v.set(point_size = 0.04)

#TakeScreenShot(Trees_Buffer,v,f,year,"MR_Points")

# End of processing 2021 25192.las

In [19]:
def TakeScreenShot(points,v,f,year,type:str):
    x_mean = np.mean(points[:,0])
    y_mean = np.mean(points[:,1])
    z_mean = np.mean(points[:,2])

    v.set(phi=np.pi/4)
    v.set(theta=np.pi/6)
    v.set(r=900)
    v.set(lookat=[x_mean,y_mean,0])

    pptk_capture_path = "Datasets/"+"Package_Generated/"+f[:-4]+"/"+str(year)+"/PPTK_screenshots_"+f[:-4]+"/"
    # Check whether the specified pptk_capture_path exists or not
    isExist = os.path.exists(pptk_capture_path)

    if not isExist:
    # Create a new directory because it does not exist 
        os.makedirs(pptk_capture_path)
    time.sleep(5) #screenshots were not captured after v.close was added
    v.capture(pptk_capture_path+"Capture_"+type+"_"+f[:-4]+"_"+str(year)+'.png')
    time.sleep(3) #screenshots were not captured after v.close was added
    v.close()

In [21]:
#plotting ground poitns found
p1 = Gpoints
p2 = NGpoints
All_points_1 = np.concatenate((p1, p2), axis=0)
rgb_p1 =  [[1,0,0]]*len(p1) #Set red colour
rgb_p2 = [[255,255,255]]*len(p2) #set green colour - Classified tree points
All_rgb = np.concatenate((rgb_p1, rgb_p2,), axis=0)

v = pptk.viewer(All_points_1, All_rgb)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,1])
v.set(point_size = 0.04)

#TakeScreenShot(All_points_1,v,f,year,"Ground_Classification")

In [None]:
logging.info("MR Classification Algorithm initated for : "+f)

lidar_df, rawpoints, MR_df, SR_df = PreprocessLasFile(f, year)

Approx_locations_str, T_lat, T_lon = Log_TileLocation(MR_df)
logging.info("Approximate Location of %s : %s", f,Approx_locations_str)

#lasTile class
TileObj_SR = MRC.MR_class(SR_df,TileDivision=10) #Single Return Points
TileObj_MR = MRC.MR_class(MR_df,TileDivision=10) #Multiple Return Points

#Serialized Creation of Lidar Subtiles
lidar_TilesubsetArr = TileObj_MR.Get_subtileArray()

In [None]:
All_eps = [] #Stores all eps values by tile id
N_Neighbours = 12
subT_ID = 0
TileDivision =10
EPS_distribution_df = pd.DataFrame(columns=['T_ID', 'T_lat', 'T_lon', 'subT_ID', 'subT_lat','subT_lon','EPS'])

for row in range(TileDivision):
    for col in range(TileDivision):

        if(len(lidar_TilesubsetArr[row][col].iloc[:,:3].to_numpy()) > N_Neighbours):

            cluster_df = lidar_TilesubsetArr[row][col].iloc[:,:3]
            subtile_location_str, subT_lat, subT_long = Log_TileLocation(cluster_df)
            subtile_eps = Get_eps_NN_KneeMethod(cluster_df)
            All_eps.append(subtile_eps)

        EPS_dist_df_row = [f,T_lat,T_lon]
        EPS_dist_df_row.append(subT_ID)
        EPS_dist_df_row.append(subT_lat)
        EPS_dist_df_row.append(subT_long)
        EPS_dist_df_row.append(subtile_eps)

        EPS_distribution_df.loc[len(EPS_distribution_df.index)] = EPS_dist_df_row
        
        subT_ID = subT_ID + 1

Optimal_EPS = np.mean(All_eps)
logging.info("Avg EPS for %s : %s",f,Optimal_EPS)

EPS_CSV_filename = 'Spatial_HP_Distribution_'+f+"_"+str(year)+'.csv'
EPS_CSV_dir = "Datasets/"+"Package_Generated/"+f[:-4]+"/"+str(year)+"/LiDAR_HP_MATRIX_"+f[:-4]+"/"
# Check whether the specified EPS_CSV_dir exists or not
isExist = os.path.exists(EPS_CSV_dir)

if not isExist:
# Create a new directory because it does not exist 
    os.makedirs(EPS_CSV_dir)

logging.info("MR - T_ID : %s - ACTION: HP_MATRIX CSV file Created",f)
EPS_distribution_df.to_csv(EPS_CSV_dir+EPS_CSV_filename)

In [None]:
Tilecounter = 0
Trees_Buffer = []
N_Neighbours = 12

for row in range(TileDivision):
    for col in range(TileDivision):

        #print('-'*40)
        
        #print("TILE ID : ",Tilecounter)
        Tilecounter = Tilecounter + 1

        if (len(lidar_TilesubsetArr[row][col].iloc[:,:3].to_numpy()) > N_Neighbours):

            cluster_df = lidar_TilesubsetArr[row][col].iloc[:,:3]
            tile_eps = Get_eps_NN_KneeMethod(cluster_df) #round(Optimal_EPS,2)
            #print(tile_eps)
            tile_segment_points = lidar_TilesubsetArr[row][col].iloc[:,:3].to_numpy()
            subTileTree_Points,  _ = TileObj_MR.Classify_MultipleReturns(tile_segment_points,tile_eps)

            for t in subTileTree_Points:
                Trees_Buffer.append(t)
            
            logging.info("MR - T_ID : %s - ACTION: Trees Added to - S_ID : %d",f,Tilecounter)
        
        else:
            logging.warn("Empty Tileset Found")

Trees_Buffer = np.array(Trees_Buffer)

In [28]:
def assign_colors(arr, clusters):
    """
    Assigns the same color to values in the same cluster in a 3D NumPy array
    :param arr: 3D numpy array
    :param clusters: list indicating which cluster each value belongs to
    :return: 3D numpy array with values in the same cluster assigned the same color
    """
    # Create a set of unique cluster labels
    labels = set(clusters)
    # Create a dictionary to map cluster labels to colors
    color_map = {label: np.random.randint(0, 255, 3) for label in labels}
    # Create an empty 3D array with the same shape as the input array
    color_arr = np.empty_like(arr)
    # Iterate over the input array and assign colors to the corresponding clusters
    for i, label in enumerate(clusters):
        color_arr[i] = color_map[label]
    return color_arr

In [None]:
db = DBSCAN(eps=np.mean(EPS_distribution_df.EPS), min_samples=30).fit(Trees_Buffer)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
DB_labels = db.labels_

In [None]:
rgb_array = assign_colors(Trees_Buffer, DB_labels)
v = pptk.viewer(Trees_Buffer)
v.attributes(rgb_array/255)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,1])
v.set(point_size = 0.04)

TakeScreenShot(Trees_Buffer,v,f,year,"MR_Points")

In [42]:
MR_TreesDf = pd.DataFrame(Trees_Buffer, columns=["X","Y","Z"])
MR_TreesDf["Cluster_Labels"] = DB_labels

# Notes:
Parks clusters -> clusters with > 2 std from mean points
large clusters -> [0.5 std , 1.5 std]
small clusters -> clusters with <  2 std from mean points
Acceptable Clusters -> [-1.5 std to + 0.5 std]

In [None]:
# def get_cluster_sizes_within_std(cluster_sizes, std_range=1):
#     mean = np.mean(cluster_sizes)
#     std = np.std(cluster_sizes)
#     lower_bound = mean - (std * std_range)
#     upper_bound = mean + (std * std_range)
#     return np.where((lower_bound <= cluster_sizes) & (cluster_sizes <= upper_bound))


In [34]:
cluster_sizes = np.bincount(DB_labels[DB_labels != -1]) #exclude outliers
C_mean = np.mean(cluster_sizes)
C_std = np.std(cluster_sizes)

In [35]:
logging.info("MR - T_ID : %s - Stats : ClusterSize_mean: %s , ClusterSize_std : %s",f,C_mean,C_std)

In [36]:
#Get Largest clusters
#Parks clusters -> clusters with > 2 std from mean points
Park_Clusters = np.where(cluster_sizes > C_mean + 2*C_std)[0]
Filtered_cluster_sizes = np.where(cluster_sizes < C_mean + 2*C_std)[0]
FC_mean = np.mean(Filtered_cluster_sizes)
FC_std = np.std(Filtered_cluster_sizes)
Park_Clusters

array([ 535, 1104, 1332, 1448, 5246, 6923, 7821])

In [37]:
FC_mean, FC_std

(4687.396391587488, 2705.73100667339)

In [38]:
logging.info("MR - T_ID : %s - STATS [PARK REMOVED]: ClusterSize_mean: %s , ClusterSize_std : %s",f,FC_mean,FC_std)

In [39]:
#Recalculating thresholds as park cluster sizes make the std >> mean 
# and hence no clusters were showing up when mean - 1*std was taken

#small clusters -> clusters with <  1 std from mean points
Small_Clusters = np.where(cluster_sizes <= FC_mean + -1*FC_std)[0]
#large clusters -> [0.05 std , 2 std]
Large_Clusters = np.where((cluster_sizes > FC_mean + 0.1*FC_std)
                        &
                        (cluster_sizes <= FC_mean + 3*FC_std))[0]
#Acceptable Clusters -> [-1 std to + 0.05 std]  
Accepted_clusters = np.where((cluster_sizes > FC_mean + -1*FC_std)
                        &
                        (cluster_sizes <= FC_mean + 0.5*FC_std))[0] 

# accepted_clusters = np.where(cluster_sizes < np.mean(cluster_sizes) + 2*np.std(cluster_sizes))[0]
# large_clusters = np.where(cluster_sizes > np.mean(cluster_sizes) + 2*np.std(cluster_sizes))[0]

In [40]:
def Get_ClusterPoints(df, label_arr):
    temp_df = df[df["Cluster_Labels"].isin(label_arr)]
    return temp_df.iloc[:,:3].to_numpy()

In [43]:
Park_CLusterPoints = Get_ClusterPoints(MR_TreesDf, Park_Clusters)
Small_ClustersPoints = Get_ClusterPoints(MR_TreesDf, Small_Clusters)
Large_ClustersPoints = Get_ClusterPoints(MR_TreesDf, Large_Clusters)
Accepted_clustersPoints = Get_ClusterPoints(MR_TreesDf, Accepted_clusters)

In [44]:
pptk.viewer(Large_ClustersPoints)

<pptk.viewer.viewer.viewer at 0x7fea097139e8>

In [None]:
Large_Clusters

In [None]:
Accepted_clusters

In [None]:
p1 = Accepted_clustersPoints
p2 = Large_ClustersPoints #Park_CLusterPoints #Accepted_clustersPoints
All_points_1 = np.concatenate((p1, p2), axis=0)
rgb_p1 = [[1,0,0]]*len(p1) #set red colour
rgb_p2 =  [[255,255,255]]*len(p2) #Set white colour 
All_rgb = np.concatenate((rgb_p1, rgb_p2), axis=0)

v = pptk.viewer(All_points_1, All_rgb)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,1])
v.set(point_size = 0.04)


TakeScreenShot(All_points_1,v,f,year,"ClusterSegmentation_(WL_RA)")

### Park Clusters

In [46]:
#Get Clusters from Local Maximas
PC_labels = []
peaks, _ = find_peaks(Park_CLusterPoints[:,2], distance=500,prominence=3)
# Get the x, y, and z coordinates of the local maximas
local_maximas = Park_CLusterPoints[peaks]

#Perform NN
nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(local_maximas)
distances, indices = nbrs.kneighbors(Park_CLusterPoints)

PC_labels = pd.Series(indices.flatten())

rgb_array = assign_colors(Park_CLusterPoints, PC_labels)
v = pptk.viewer(Park_CLusterPoints)
v.attributes(rgb_array/255)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,1])
v.set(point_size = 0.04)

#TakeScreenShot(Park_CLusterPoints,v,f,year,"Park_Clusters")

### Accepted Clusters

In [47]:
db = DBSCAN(eps=1.5, min_samples=30).fit(Accepted_clustersPoints)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
AC_labels = db.labels_

rgb_array = assign_colors(Accepted_clustersPoints, AC_labels)
v = pptk.viewer(Accepted_clustersPoints)
v.attributes(rgb_array/255)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,1])
v.set(point_size = 0.04)

#TakeScreenShot(Accepted_clustersPoints,v,f,year,"Regular_TreeClusters")

### Large Clusters

In [48]:
LC_df = pd.DataFrame(Large_ClustersPoints,columns=["X","Y","Z"])

maxSearch = LC_df
maxSearch['seeds'] = 1
xs = maxSearch['X'].to_numpy()
ys = maxSearch['Y'].to_numpy()
zs = maxSearch['Z'].to_numpy()
seeds = maxSearch['seeds'].to_numpy()

neighborhood = 4
changes = 1
while changes > 0:
    changes = 0
    i = 0
    for x,y,z,seed in zip(xs,ys,zs,seeds):
        if seed == 1:
            zsearch = zs[xs > x - neighborhood]
            xsearch = xs[xs > x - neighborhood]
            ysearch = ys[xs > x - neighborhood]
            
            zsearch = zsearch[xsearch < x + neighborhood]
            ysearch = ysearch[xsearch < x + neighborhood]
            
            zsearch = zsearch[ysearch > y - neighborhood]
            ysearch = ysearch[ysearch > y - neighborhood]
            
            zsearch = zsearch[ysearch < y + neighborhood]

            zmax = np.max(zsearch)
            if z < zmax:
                seeds[i] = 0
                changes += 1
            else:
                pass
        else:
            pass
        i+=1

maxSearch['seeds'] = seeds      
localMaxima = maxSearch[maxSearch['seeds']>0]

centers=localMaxima[['X','Y','Z']].to_numpy()
nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(centers)
points = maxSearch[['X','Y','Z']].to_numpy()
distances, indices = nbrs.kneighbors(points)
maxSearch['nearestNeighbor'] = indices

In [None]:
LC_labels = pd.Series(indices.flatten())
rgb_array = assign_colors(Large_ClustersPoints, LC_labels)
v = pptk.viewer(Large_ClustersPoints)
v.attributes(rgb_array/255)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,1])
v.set(point_size = 0.04)

TakeScreenShot(Accepted_clustersPoints,v,f,year,"Large_TreeClusters_NN")

In [None]:
len(np.unique(PC_labels)), len(np.unique(LC_labels)),  len(np.unique(AC_labels))

In [None]:
logging.info("MR - T_ID : %s - STATS: Park Trees: %d ",f,len(np.unique(PC_labels)))
logging.info("MR - T_ID : %s - STATS: Large CLuster Trees: %d ",f,len(np.unique(LC_labels)))
logging.info("MR - T_ID : %s - STATS: Acceptable Trees: %d ",f,len(np.unique(AC_labels)))

In [None]:
Total_TreeCount = len(np.unique(PC_labels))+len(np.unique(LC_labels))+len(np.unique(AC_labels))
logging.info("MR - T_ID : %s - STATS: TOTAL Trees: %d ",f,Total_TreeCount)

### Small CLusters

#### IGNORED FOR NOW

# Creating JSON File for a single Tree

In [None]:
def Get_SRpoints(MR_points, NG_df, AreaExtent = 2):

    SR_ClusterTreePoints = []
    NotTreePoints = []
    #create a 2D convex Hull
    hull = ConvexHull(MR_points[:,:2])

    #Get an area of points to look at (No need to iterate through all points in the tile)
    #get bounding subset to look at
    SR_shape_X_max = hull.max_bound[0] + AreaExtent
    SR_shape_Y_max = hull.max_bound[1] + AreaExtent

    SR_shape_X_min = hull.min_bound[0] - AreaExtent
    SR_shape_Y_min = hull.min_bound[1] - AreaExtent

    lidar_subset_df = NG_df[
        (NG_df['X'].between(SR_shape_X_min, SR_shape_X_max, inclusive=False) &
    NG_df['Y'].between(SR_shape_Y_min, SR_shape_Y_max, inclusive=False))
    ]

    #Get only points from BF
    SR_masked_points = lidar_subset_df.iloc[:,:3].to_numpy()

    #Create a shape of the building footprint
    MR_shape_alpha = alphashape.alphashape(MR_points, alpha=0)

    for p in SR_masked_points:
        tp = Point(p)
        if(tp.within(MR_shape_alpha)):
            SR_ClusterTreePoints.append(p)
        else:
            NotTreePoints.append(p)
    
    return np.array(SR_ClusterTreePoints),np.array(NotTreePoints)

def Create_ConvexHullDict(hull):
    # Convex Hull Data
    ConvexHullData = {
        "vertices" : hull.vertices.tolist(),
        "simplices" : hull.simplices.tolist(),
        "ClusterPoints" : hull.points.tolist(),
        "equations" : hull.equations.tolist(),
        "volume" : hull.volume,
        "area" : hull.area
    }

    return ConvexHullData

def Store_TreeCluster_toJSON(filename, year,
                            arr, labels,
                            NGpoints_df, 
                            Extracted_SRpoints, Extracted_NTpoints,
                            Global_TreeCounter,
                            Elevation,
                            jfolderpath, isPark=False):


    las_fileID = filename[:-4]

    # Get unique labels
    unique_labels = np.unique(labels)

    # Iterate over unique labels
    for label in unique_labels:

        if label == -1: #ignore outliers
            continue

        MR_Cluster_data = arr[labels == label]

        if(len(MR_Cluster_data) < 10):
            continue

        Global_TreeCounter += 1

        # Get rows of arr that have the current label
        SR_Cluster_data, Extracted_NotTreeData = Get_SRpoints(MR_Cluster_data, NGpoints_df)
        
        if(len(SR_Cluster_data) == 0):
            #No SR points
            logging.warn("EMPTY SR set found")
            SR_Cluster_data = np.array([]) #empty array
            Full_TreePoints = MR_Cluster_data
        else:
            Full_TreePoints = np.vstack([MR_Cluster_data,SR_Cluster_data])

        if(len(SR_Cluster_data)>0):
            for sr_p in SR_Cluster_data:
                Extracted_SRpoints.append(sr_p)
        if(len(Extracted_NotTreeData)>0):
            for nt_p in Extracted_NotTreeData:
                Extracted_NTpoints.append(nt_p)
        
        #Generate Convex hull for this cluster
        MR_hull = ConvexHull(MR_Cluster_data)
        MR_HullInfoDict = Create_ConvexHullDict(MR_hull)

        if(len(SR_Cluster_data)> 4):
            SR_hull = ConvexHull(SR_Cluster_data)
            SR_HullInfoDict = Create_ConvexHullDict(SR_hull)
        else:
            SR_hull = None
            SR_HullInfoDict = None

        Tree_hull = ConvexHull(Full_TreePoints)

        TreeClusterCentroid = np.mean(Full_TreePoints, axis=0)
        latitude , longitude = ConvertLatLong(TreeClusterCentroid)
        
        Tree_HullInfoDict = Create_ConvexHullDict(Tree_hull)
        
        JSON_data_buffer = {
            "lasFileName" : filename,
            "RecordedYear" : year,
            "Tree_CountId" : Global_TreeCounter,
            "MR_points" : MR_Cluster_data.tolist(),
            "SR_points" : SR_Cluster_data.tolist(),
            "Tree_Points" : Full_TreePoints.tolist(),
            "PredictedTreeLocation" : {
                "Latitude" : latitude,
                "Longitude" : longitude
            },
            "TreeFoliageHeight" : Tree_hull.points[:,2].max() - Tree_hull.points[:,2].min(),
            "GroundZValue" : Elevation,
            "ClusterCentroid" : TreeClusterCentroid.tolist(),
            "ConvexHull_MRDict" : MR_HullInfoDict,
            "ConvexHull_SRDict" : SR_HullInfoDict,
            "ConvexHull_TreeDict" : Tree_HullInfoDict,
            "InPark" : isPark
        }
        
        jfilepath = str(las_fileID)+"_"+str(year)+"_ID_"+str(Global_TreeCounter)+"_TreeCluster.json"

        # # Check whether the specified jfilepath exists or not
        # isExist = os.path.exists(jfilepath)

        # if not isExist:
        # # Create a new directory because it does not exist 
        #     os.makedirs(jfilepath)

        #Save JSON Buffer
        with open(jfolderpath+"/"+jfilepath, "w") as jsonFile:
            
            jsonFile.truncate(0)
            json.dump(JSON_data_buffer, jsonFile)
        
            logging.info("Tree JSON File Created - Tree ID : %d",Global_TreeCounter)
    
    return Global_TreeCounter

In [None]:
NGpoints_df = pd.DataFrame(NGpoints,columns=["X","Y","Z"])

In [None]:
JSONfoldernName = "JSON_TreeData_"+f[:-4]

jfolderpath = "Datasets/" + "Package_Generated/"+f[:-4]+"/"+str(year)+"/" + JSONfoldernName +"/"

# Check whether the specified jfolderpath exists or not
isExist = os.path.exists(jfolderpath)

if not isExist:
# Create a new directory because it does not exist 
    os.makedirs(jfolderpath)

# labels = [PC_labels,LC_labels,AC_labels]
# point_arr = [Park_CLusterPoints, Large_ClustersPoints, Accepted_clustersPoints]
# arr = Park_CLusterPoints
Global_TreeCounter = 0


las_fileID = f[:-4]

Extracted_SRpoints = []
Extracted_NTpoints = []


Global_TreeCounter = Store_TreeCluster_toJSON(f, year,
                            Park_CLusterPoints, PC_labels,
                            NGpoints_df, 
                            Extracted_SRpoints, Extracted_NTpoints,
                            Global_TreeCounter,
                            Elevation,
                            jfolderpath,isPark=True)

Global_TreeCounter = Store_TreeCluster_toJSON(f, year,
                            Large_ClustersPoints, LC_labels,
                            NGpoints_df, 
                            Extracted_SRpoints, Extracted_NTpoints,
                            Global_TreeCounter + 1,
                            Elevation,
                            jfolderpath)

Global_TreeCounter = Store_TreeCluster_toJSON(f, year,
                            Accepted_clustersPoints, AC_labels,
                            NGpoints_df, 
                            Extracted_SRpoints, Extracted_NTpoints,
                            Global_TreeCounter + 1,
                            Elevation,
                            jfolderpath)


# Creating a new .las file with new labels

In [None]:
All_Classified_points = np.concatenate((Gpoints, Trees_Buffer,Extracted_SRpoints,Extracted_NTpoints), axis=0)

class_1Labels = [1]*(len(Gpoints))
class_2Labels = [2]*(len(Trees_Buffer))
class_3Labels = [3]*(len(Extracted_SRpoints))
class_4Labels = [4]*(len(Extracted_NTpoints))

final_labels = np.concatenate((class_1Labels,class_2Labels,class_3Labels,class_4Labels),axis=0)

In [None]:
Classifiedpoints_df = pd.DataFrame(All_Classified_points,columns=["X","Y","Z"])
Classifiedpoints_df = Classifiedpoints_df.drop_duplicates()
Original_lidarpoints_df = lidar_df.iloc[:,:3]
# Unclassified_Points = Original_lidarpoints_df[~Classifiedpoints_df.isin(Original_lidarpoints_df)]

In [None]:
arr1 = All_Classified_points
arr2 = Original_lidarpoints_df.to_numpy()

arr2_set = np.unique(Original_lidarpoints_df.to_numpy(), axis=0)

In [None]:
df1=Original_lidarpoints_df
df2=Classifiedpoints_df

df_merged = df1.merge(df2, how="left", on=['X','Y','Z'], indicator=True)
Unclassified_Points_df =  df_merged.query("_merge == 'left_only'")[['X','Y','Z']]

Unclassified_Points = Unclassified_Points_df.to_numpy()
# merged_df = pd.merge(df1, df2, how='left', on=['X','Y','Z'])
# Unclassified_Points = merged_df.to_numpy()

In [None]:
class_1Labels = [1]*(len(Gpoints))
class_2Labels = [2]*(len(Trees_Buffer))
class_3Labels = [3]*(len(Extracted_SRpoints))
class_4Labels = [4]*(len(Extracted_NTpoints))
class_5Labels = [5]*(len(Unclassified_Points))

final_labels = np.concatenate((class_1Labels,class_2Labels,class_3Labels,class_4Labels,class_5Labels),axis=0)
All_points = np.concatenate((Gpoints, Trees_Buffer,Extracted_SRpoints,Extracted_NTpoints,Unclassified_Points), axis=0)


In [None]:
clippedLasNumpy = All_points
las = laspy.create(file_version="1.4", point_format=3)

Xscale = 0.01
Yscale = 0.01
Zscale = 0.01
Xoffset = 0
Yoffset = 0
Zoffset = 0

las.header.offsets = [Xoffset,Yoffset,Zoffset]
las.header.scales = [Xscale,Yscale,Zscale]
las.x = clippedLasNumpy[:, 0]
las.y = clippedLasNumpy[:, 1]
las.z = clippedLasNumpy[:, 2]
las.intensity = [0]*len(clippedLasNumpy)
las.classification =  final_labels
las.return_number =  [0]*len(clippedLasNumpy)
las.number_of_returns =  [0]*len(clippedLasNumpy)

generated_lasfolderpath =  "Datasets/" + "Package_Generated/"+f[:-4]+"/"+str(year)+"/LasClassified_"+f[:-4]+"/"
# Check whether the specified generated_lasfolderpath exists or not
isExist = os.path.exists(generated_lasfolderpath)
if not isExist:
# Create a new directory because it does not exist 
    os.makedirs(generated_lasfolderpath)
    
generated_lasfilename = "lasFile_Reconstructed_"+f

las.write(generated_lasfolderpath+generated_lasfilename)

In [None]:
generated_lasfolderpath =  "Datasets/" + "Package_Generated/"+f[:-4]+"/"+str(year)+"/LasClassified_"+f[:-4]+"/"

In [None]:
#Plotting new classified las file

Gen_las = laspy.read(generated_lasfolderpath+generated_lasfilename)

Xscale = Gen_las.header.x_scale
Yscale = Gen_las.header.y_scale
Zscale = Gen_las.header.z_scale

Xoffset = Gen_las.header.x_offset
Yoffset = Gen_las.header.y_offset
Zoffset = Gen_las.header.z_offset

Gen_lidarpoints = np.array(
    ( (Gen_las.X*1.00) + Xoffset,  # convert ft to m and correct measurement
      (Gen_las.Y*1.00) + Yoffset,
      (Gen_las.Z*1.00) + Zoffset,
    Gen_las.intensity,
    Gen_las.classification,
    Gen_las.return_number, 
    Gen_las.number_of_returns)).transpose()
G_lidar_df = pd.DataFrame(Gen_lidarpoints , columns=['X','Y','Z','intensity','classification','return_number','number_of_returns'])


In [None]:
# class_1Labels = [1]*(len(Gpoints))
# class_2Labels = [2]*(len(Trees_Buffer))
# class_3Labels = [3]*(len(Extracted_SRpoints))
# class_4Labels = [4]*(len(Extracted_NTpoints))

G_las_Gpoints = G_lidar_df.iloc[:,:3][G_lidar_df["classification"] == 1].to_numpy()
G_las_Tpoints = G_lidar_df.iloc[:,:3][G_lidar_df["classification"] == 2].to_numpy()
G_las_SRpoints = G_lidar_df.iloc[:,:3][G_lidar_df["classification"] == 3].to_numpy()
G_las_NTpoints = G_lidar_df.iloc[:,:3][G_lidar_df["classification"] == 4].to_numpy()
G_las_NCpoints = G_lidar_df.iloc[:,:3][G_lidar_df["classification"] == 5].to_numpy()

#plotting inlier and outlier
All_points_1 = np.concatenate((G_las_Gpoints, G_las_Tpoints,G_las_SRpoints,G_las_NTpoints,G_las_NCpoints), axis=0)
rgb_Ground =  [[1,0,0]]*len(G_las_Gpoints) #Set red colour
rgb_Tree = [[0,1,0]]*len(G_las_Tpoints) #set green colour
rgb_SR = [[0,0,1]]*len(G_las_SRpoints) #set blue colour
rgb_NT = [[255,255,255]]*len(G_las_NTpoints) #set white colour
rgb_NC = [[255,255,0]]*len(G_las_NCpoints) #set cyan colour
All_rgb = np.concatenate((rgb_Ground, rgb_Tree,rgb_SR,rgb_NT,rgb_NC), axis=0)

#Red - Inlier - ground plane , Green - Outlier
v = pptk.viewer(All_points_1, All_rgb)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,1])
v.set(point_size = 0.04)

TakeScreenShot(All_points_1,v,f,year,"Classified_LasFile")

In [None]:
etime = time.time()
logging.info("Completed file: %s - Total Time Taken : ", f, etime - stime)

---
---
---

# PLotting from generated las file

In [None]:
# Reading las file
laspath = '/Users/sarangpramode/Desktop/Hub/TerraVide/Datasets/Package_Generated/25192/2017/LasClassified_25192/lasFile_Reconstructed_25192.las' #Datasets/Package_Generated/915120/2017/LasClassified_915120/lasFile_Reconstructed_915120.las'
#laspath = '/Users/sarangpramode/Desktop/Hub/TerraVide/Datasets/Package_Generated/922135/2017/LasClassified_922135/lasFile_Reconstructed_922135.las'
#Plotting new classified las file

test_las = laspy.read(laspath)

Xscale = test_las.header.x_scale
Yscale = test_las.header.y_scale
Zscale = test_las.header.z_scale

Xoffset = test_las.header.x_offset
Yoffset = test_las.header.y_offset
Zoffset = test_las.header.z_offset

test_lidarpoints = np.array(
    ( (test_las.X*1.00) + Xoffset,  # convert ft to m and correct measurement
      (test_las.Y*1.00) + Yoffset,
      (test_las.Z*1.00) + Zoffset,
    test_las.intensity,
    test_las.classification,
    test_las.return_number, 
    test_las.number_of_returns)).transpose()
T_lidar_df = pd.DataFrame(test_lidarpoints , columns=['X','Y','Z','intensity','classification','return_number','number_of_returns'])

# class_1Labels = [1]*(len(Gpoints))
# class_2Labels = [2]*(len(Trees_Buffer))
# class_3Labels = [3]*(len(Extracted_SRpoints))
# class_4Labels = [4]*(len(Extracted_NTpoints))

G_las_Gpoints = T_lidar_df.iloc[:,:3][T_lidar_df["classification"] == 1].to_numpy()
G_las_Tpoints = T_lidar_df.iloc[:,:3][T_lidar_df["classification"] == 2].to_numpy()
G_las_SRpoints = T_lidar_df.iloc[:,:3][T_lidar_df["classification"] == 3].to_numpy()
G_las_NTpoints = T_lidar_df.iloc[:,:3][T_lidar_df["classification"] == 4].to_numpy()
G_las_NCpoints = T_lidar_df.iloc[:,:3][T_lidar_df["classification"] == 5].to_numpy()

#plotting inlier and outlier
#All_points_1 = np.concatenate((G_las_Gpoints, G_las_Tpoints,G_las_SRpoints,G_las_NTpoints,G_las_NCpoints), axis=0)
All_points_1 = np.concatenate((G_las_Gpoints, G_las_Tpoints,G_las_SRpoints,G_las_NTpoints), axis=0)

rgb_Ground =  [[1,0,0]]*len(G_las_Gpoints) #Set red colour
rgb_Tree = [[0,1,0]]*len(G_las_Tpoints) #set green colour
rgb_SR = [[0,0,1]]*len(G_las_SRpoints) #set blue colour
rgb_NT = [[255,255,255]]*len(G_las_NTpoints) #set white colour
rgb_NC = [[255,255,0]]*len(G_las_NCpoints) #set yellow colour
#All_rgb = np.concatenate((rgb_Ground, rgb_Tree,rgb_SR,rgb_NT,rgb_NC), axis=0)
All_rgb = np.concatenate((rgb_Ground, rgb_Tree,rgb_SR,rgb_NT), axis=0)

#Red - Inlier - ground plane , Green - Outlier
v = pptk.viewer(All_points_1, All_rgb)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,1])
v.set(point_size = 0.04)

In [None]:
stopper

In [None]:
p1 = Park_CLusterPoints
p2 = Extracted_SRpoints
All_points_1 = np.concatenate((p1, p2), axis=0)
rgb_p2 =  [[255,255,255]]*len(p2) #Set white colour - Not Ground
rgb_p1 = [[1,0,0]]*len(p1) #set red colour - Ground
All_rgb = np.concatenate((rgb_p1, rgb_p2), axis=0)

v = pptk.viewer(All_points_1, All_rgb)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,0])
v.set(point_size = 0.04)

In [None]:
rgb_array = assign_colors(Trees_Buffer, accepted_clusters)
v = pptk.viewer(Trees_Buffer)
v.attributes(rgb_array/255)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,0])
v.set(point_size = 0.04)

In [None]:
pptk.viewer(Filtered_TreeBuffer)

In [None]:
db = DBSCAN(eps=np.mean(EPS_distribution_df.EPS), min_samples=30).fit(Trees_Buffer)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
DB_labels = db.labels_

In [None]:
labels, count = np.unique(DB_labels,return_counts=True)
label_count_arr = np.asarray([labels , count]).T

In [None]:
Threshold = 2000 #want to ignore clusters from with multiple trees
filtered_cloud_labels = []
retained_cloud_labels = []
for i in label_count_arr:
    
    if i[1] > Threshold:
        filtered_cloud_labels.append(i[0])
    else:
        retained_cloud_labels.append(i[0])
retained_cloud_labels = np.array(retained_cloud_labels)

In [None]:
filtered_TB_cloud = []

for fl in filtered_cloud_labels:
    if fl == -1:
        continue
    indexes = np.where(fl == DB_labels)
    points = Trees_Buffer[indexes]
    for p in points:
        filtered_TB_cloud.append(p)
filtered_TB_cloud=np.array(filtered_TB_cloud)

In [None]:
retained_TB_cloud = []

for rl in retained_cloud_labels:
    if rl == -1:
        continue
    indexes = np.where(rl == DB_labels)
    points = Trees_Buffer[indexes]
    for p in points:
        retained_TB_cloud.append(p)
retained_TB_cloud=np.array(retained_TB_cloud)

In [None]:
pptk.viewer(retained_TB_cloud)

In [None]:
Trees_Buffer.shape, LM_cluster_labels.shape

---

In [None]:
NGpoints_df['nearestNeighbor'] = indices

In [None]:
# Create a set of unique cluster labels
clusters = NGpoints_df['nearestNeighbor']
labels = set(clusters)
# Create a dictionary to map cluster labels to colors
color_map = {label: np.random.randint(0, 255, 3) for label in labels}

In [None]:
indices.flatten()

In [None]:
v = pptk.viewer(NGpoints)
v.attributes(rgb_array/255)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,0])
v.set(point_size = 0.04)

In [None]:
# fig = plt.figure(figsize=(15,15), dpi=300, constrained_layout=True)
# ax1 = fig.add_subplot(1, 1, 1, projection='3d') ############      
# ax1.scatter3D(NGpoints_df['X'], NGpoints_df['Y'], NGpoints_df['Z'], zdir='z', s=0.11, c=NGpoints_df['nearestNeighbor'], marker='o', depthshade=True)
# ax1.grid(False)
# ax1.set_axis_off()           
# plt.show

In [None]:
#plotting tree poitns found
p1 = local_maximas
p2 = NGpoints
All_points_1 = np.concatenate((p1, p2), axis=0)
rgb_p2 =  [[255,255,255]]*len(p2) #Set white colour - Not Ground
rgb_p1 = [[1,0,0]]*len(p1) #set red colour - Ground
All_rgb = np.concatenate((rgb_p1, rgb_p2), axis=0)

v = pptk.viewer(All_points_1, All_rgb)
v.set(show_grid=False)
v.set(show_axis=False)
v.set(bg_color = [0,0,0,0])
v.set(point_size = 0.04)

In [None]:

#Taking time - >10min(not completed) for 2964543 points
maxSearch = df
maxSearch['seeds'] = 1
xs = maxSearch['X'].to_numpy()
ys = maxSearch['Y'].to_numpy()
zs = maxSearch['Z'].to_numpy()
seeds = maxSearch['seeds'].to_numpy()

neighborhood = 4
changes = 1
while changes > 0:
    changes = 0
    i = 0
    for x,y,z,seed in zip(xs,ys,zs,seeds):
        if seed == 1:
            zsearch = zs[xs > x - neighborhood]
            xsearch = xs[xs > x - neighborhood]
            ysearch = ys[xs > x - neighborhood]
            
            zsearch = zsearch[xsearch < x + neighborhood]
            ysearch = ysearch[xsearch < x + neighborhood]
            
            zsearch = zsearch[ysearch > y - neighborhood]
            ysearch = ysearch[ysearch > y - neighborhood]
            
            zsearch = zsearch[ysearch < y + neighborhood]

            zmax = np.max(zsearch)
            if z < zmax:
                seeds[i] = 0
                changes += 1
            else:
                pass
        else:
            pass
        i+=1

maxSearch['seeds'] = seeds      
localMaxima = maxSearch[maxSearch['seeds']>0]

#

centers=localMaxima[['X','Y','Z']].to_numpy()
nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(centers)
points = maxSearch[['X','Y','Z']].to_numpy()
distances, indices = nbrs.kneighbors(points)
maxSearch['nearestNeighbor'] = indices