In [3]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json
import utm
import geojsoncontour
import plotly.graph_objects as go
import laspy
import rasterio
import pyproj
import pygmt
import sys
from affine import Affine
from rasterio.transform import from_origin
from tqdm import tqdm
from matplotlib.patches import Circle
from sklearn.cluster import DBSCAN

Credits to https://www.generic-mapping-tools.org/egu22pygmt/lidar_to_surface.html for the tutorial on pygmt and LiDAR data

# Constants

In [4]:
# Define the bounding box coordinates in WGS-84
LAT_MIN, LAT_MAX = 50.85, 51
LON_MIN, LON_MAX = 6.85, 7.05

# Minimum Z to consider
Z_MIN = 50

# Subsampling factor for points cloud
SSFACTOR = 4

# Subbox size in meters
box_size = 2000

# Points classification to consider
# https://www.bezreg-koeln.nrw.de/brk_internet/geobasis/hoehenmodelle/nutzerinformationen.pdf
lastReturnNichtBoden = 20
brueckenpunkte = 17
class_ok = [brueckenpunkte, lastReturnNichtBoden]

# Define the functions we need

In [5]:
# Define conversion functions

def utm_to_latlon(x, y):
    # Convert lat/lon to UTM coordinates
    lat, lon = utm.to_latlon(x, y, 32, 'U')

    return lat, lon

def latlon_to_utm(lat, lon):
    # Convert lat/lon to UTM coordinates
    utm_x, utm_y, _, _ = utm.from_latlon(lat, lon)

    return utm_x, utm_y

In [6]:
# Define functions to find and load LiDAR data files

def find_files(bbox_min_x, bbox_max_x, bbox_min_y, bbox_max_y):
       
    bbox_min_x = bbox_min_x
    bbox_min_y = bbox_min_y

    bbox_max_x = bbox_max_x
    bbox_max_y = bbox_max_y

    # Determine the necessary .laz files based on the easting and northing coordinates
    min_easting = int(bbox_min_x) // 1000
    min_northing = int(bbox_min_y) // 1000
    max_easting = int(bbox_max_x) // 1000
    max_northing = int(bbox_max_y) // 1000

    laz_files = []

    for easting in range(min_easting, max_easting + 1):
        for northing in range(min_northing, max_northing + 1):
            #filename = f"./lidar_data/3dm_32_{easting:03d}_{northing:04d}_1_nw.laz"
            filename = f"/Volumes/SSD_portable/lidar_data/Cologne_extended/3dm_kacheln/3dm_32_{easting:03d}_{northing:04d}_1_nw.laz"
            laz_files.append(filename)

    return laz_files

def load_files(laz_files):

    # Initialize empty arrays for point coordinates and elevations
    x_all = np.array([])
    y_all = np.array([])
    z_all = np.array([])

    for file in laz_files:
        las = laspy.read(file)
        #print('processing %s'%(file))
        x = las.x[::SSFACTOR]
        y = las.y[::SSFACTOR]
        z = las.z[::SSFACTOR]
        class_val = las.classification[::SSFACTOR]

        mask = (np.isin(class_val, class_ok))&(z>=Z_MIN)

        # Stack point coordinates and elevations
        x_all = np.hstack((x_all, x[mask]))
        y_all = np.hstack((y_all, y[mask]))
        z_all = np.hstack((z_all, z[mask]))
    
    return x_all, y_all, z_all

In [7]:
# Define functions to load elevation data from the DEM

def transform_coords(lat, lon, src_crs, dest_crs):
    """Transform coordinates from the source CRS to the destination CRS."""
    transformer = pyproj.Transformer.from_crs(src_crs, dest_crs, always_xy=True)
    return transformer.transform(lon, lat)

def latlon_to_rowcol(lat, lon, transform):
    """Convert latitude and longitude to row and column indices."""
    col, row = ~transform * (lon, lat)
    return int(row), int(col)

def get_elevation(dem_file, lat, lon):
    """Get elevation for a given latitude and longitude from a DEM file."""
    with rasterio.open(dem_file) as dataset:
        # Get the affine transform for the dataset
        transform = dataset.transform

        # Define CRS for WGS84 and ETRS89
        wgs84_crs = 'EPSG:4326'
        etrs89_crs = dataset.crs

        # Transform the coordinates from WGS84 to ETRS89
        etrs89_lon, etrs89_lat = transform_coords(lat, lon, wgs84_crs, etrs89_crs)

        # Convert lat, lon to row, col
        row, col = latlon_to_rowcol(etrs89_lat, etrs89_lon, transform)

        # Read the elevation value
        elevation = dataset.read(1)[row, col]
        return elevation

# Load LiDAR data in a dataframe - Do not execute if the dataframe pickle is already available

In [11]:
bbox_min_x, bbox_min_y = latlon_to_utm(LAT_MIN, LON_MIN)
bbox_max_x, bbox_max_y = latlon_to_utm(LAT_MAX, LON_MAX)

x_edges = []
y_edges = []

x_edges = np.arange(bbox_min_x, bbox_max_x,  box_size)
x_edges = np.append(x_edges, bbox_max_x)

y_edges = np.arange(bbox_min_y, bbox_max_y,  box_size)
y_edges = np.append(y_edges, bbox_max_y)

x_len = len(x_edges)
y_len = len(y_edges)

x_all = np.array([])
y_all = np.array([])
z_all = np.array([])

print('x_len: %s, y_len: %s, number of boxes: %s'%(str(x_len), str(y_len), str((x_len-1)*(y_len-1))))

cols, rows = (x_len-1, y_len-1)

# Initialise 2D lists (which will contain arrays and not scalar, so 2D np array does not work here)
x_results = [[0 for i in range(cols)] for j in range(rows)]
y_results = [[0 for i in range(cols)] for j in range(rows)]
z_results = [[0 for i in range(cols)] for j in range(rows)]

# iterate on subboxes
for i in tqdm(range(x_len - 1)): # subboxes along x axis (longitude)
    for j in range(y_len - 1): # subboxes along y axis (latitude)

        laz_files = find_files(x_edges[i], x_edges[i+1], y_edges[j], y_edges[j+1])

        x_all_temp, y_all_temp, z_all_temp = load_files(laz_files)

        x_all = np.concatenate((x_all, x_all_temp))
        y_all = np.concatenate((y_all, y_all_temp))
        z_all = np.concatenate((z_all, z_all_temp))

x_len: 9, y_len: 10, number of boxes: 72


100%|██████████| 8/8 [09:30<00:00, 71.25s/it]


In [14]:
# Create our dataframe

df = pd.DataFrame(
    data={
        "x": x_all,
        "y": y_all,
        "z": z_all
    }
)

size_df = sys.getsizeof(df)
print(f"Size of the DataFrame: {np.ceil(size_df / (1024*1024))} MB")

# Save our dataframe for future use (optional, heavy file)

# df.to_pickle("./lidar_pkl/df_Cologne_extended.pkl")

Size of the DataFrame: 6189.0 MB


# Load the LiDAR dataframe (optional)

In [8]:
df = pd.read_pickle("./lidar_pkl/df_Cologne_extended.pkl")

# Preprocess LiDAR data using pygmt's blockmedian function

In [10]:
# Get bounding box region

region = pygmt.info(data=df[["x", "y"]], spacing=1)  # West, East, South, North

print(f"Data points covers region: {region}")

Data points covers region: [ 348000.  364000. 5635000. 5652000.]


In [11]:
df_trimmed = pygmt.blockmedian(
    data=df[["x", "y", "z"]],
    T=0.99,  # 99th quantile, i.e. the highest point
    spacing="1+e",
    region=region,
)

size_df_trimmed = sys.getsizeof(df_trimmed)
print(f"Size of the DataFrame: {np.ceil(size_df_trimmed / (1024*1024))} MB")

del(df)

Size of the DataFrame: 1496.0 MB


In [9]:
# Save the trimmed dataframe if necessary

df_trimmed.to_pickle("./lidar_data/df_trimmed_Cologne_extended.pkl")

# Identify the obstacles user DBSCAN clusters

In [12]:
# Identify points that are above 120m above geoid (for Cologne this means about 70 m above ground).
high_points = df_trimmed[df_trimmed['z'] > 120]

# Assuming that points within 100m of each other belong to the same obstacle
clustering = DBSCAN(eps=50, min_samples=2).fit(high_points[['x', 'y', 'z']])

# Add the cluster labels to the high_points DataFrame
high_points['cluster'] = clustering.labels_

# Filter out noise points (DBSCAN labels noise as -1)
obstacles = high_points[high_points['cluster'] != -1]

# Find the highest point in each obstacle cluster
highest_points = obstacles.loc[obstacles.groupby('cluster')['z'].idxmax()]

# The resulting DataFrame 'highest_points' contains the coordinates of the highest point of each obstacle
highest_points.reset_index(drop=True, inplace=True)

# Apply the conversion function to the DataFrame to create new columns 'lat' and 'lon'
highest_points['lat'], highest_points['lon'] = zip(*highest_points.apply(lambda row: utm_to_latlon(row['x'], row['y']), axis=1))

# Display the resulting DataFrame
highest_points

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  high_points['cluster'] = clustering.labels_


Unnamed: 0,x,y,z,cluster,lat,lon
0,355696.97,5651889.23,194.01,0,51.000461,6.943399
1,358819.43,5649009.43,120.50,1,50.975355,6.988989
2,358839.19,5648870.44,121.02,2,50.974111,6.989325
3,358296.11,5647377.22,182.46,3,50.960558,6.982176
4,352501.18,5647156.34,125.68,4,50.957119,6.899805
...,...,...,...,...,...,...
103,351424.96,5635560.26,127.97,103,50.852634,6.889212
104,351837.58,5635550.84,124.50,104,50.852655,6.895074
105,348670.38,5635010.50,128.48,105,50.846979,6.850334
106,352088.39,5635085.40,135.10,106,50.848536,6.898822


In [12]:
# Open obstacle database
#
path_to_obstacles_json = './obstacles_ENR5.4.json'
with open(path_to_obstacles_json) as obstacles_database:
    obstacles_data = json.load(obstacles_database)
obs_df = pd.json_normalize(obstacles_data, record_path =['obstacles'])

obs_df = obs_df.drop(index=16)

obs_df

Unnamed: 0,id,name,permanent,lat,lon,height_m,terrain_elevation_m,ENR5.4
0,0,Colonius Fernsehturm,True,50.946944,6.931944,268,48,NORDRHEIN-WESTFALEN 204-10
1,1,Koelnturm (Mediapark),True,50.947908,6.942575,165,50,NORDRHEIN-WESTFALEN 271-10
2,2,Koelner Dom,True,50.94152,6.957296,157,55,NORDRHEIN-WESTFALEN 92-10
3,3,AREO Turm,True,50.9117,6.963006,76,51,
4,4,Colonia Haus,True,50.960556,6.981667,148,46,NORDRHEIN-WESTFALEN 632-10
5,5,Koeln Triangle,True,50.940306,6.971778,106,45,NORDRHEIN-WESTFALEN 645-10
6,6,Uni-Center,True,50.9218,6.932576,133,52,NORDRHEIN-WESTFALEN 167-10
7,7,Krohstr. 4 (Bayenthal),True,50.907423,6.972111,73,52,
8,8,TUV-Rheinland-Turm,True,50.924352,6.992429,114,50,
9,9,Pollonius,True,50.921617,7.004475,115,45,NORDRHEIN-WESTFALEN 263-10


In [50]:
dem_file = './DEM_data/Cologne_EUDEM_v11.tif'

# Add DEM elevations to the AIP obstales data
for index, row in obs_df.iterrows():
    obs_df.at[index, 'dem_gnd_elev'] = get_elevation(dem_file, row['lat'], row['lon'])

# Add DEM elevations to the LiDAR obstacles data and calculate obstacles heights
for index, row in highest_points.iterrows():
    highest_points.at[index, 'dem_gnd_elev'] = get_elevation(dem_file, row['lat'], row['lon'])
for index, row in highest_points.iterrows():
    highest_points.at[index, 'lidar_obs_hgt'] = row['z'] - row['dem_gnd_elev']

In [53]:
highest_points[highest_points['lidar_obs_hgt']>100]

Unnamed: 0,x,y,z,cluster,lat,lon,dem_gnd_elev,lidar_obs_hgt
0,355696.97,5651889.23,194.01,0,51.000461,6.943399,43.784355,150.225645
3,358296.11,5647377.22,182.46,3,50.960558,6.982176,48.236443,134.223557
8,355512.23,5646066.49,214.5,8,50.948087,6.943082,59.824844,154.675156
9,354725.39,5645983.48,311.9,9,50.947143,6.931921,53.679962,258.220038
12,356490.14,5645300.33,212.36,12,50.941446,6.957295,57.550079,154.809921
33,358911.25,5643321.59,160.49,33,50.924261,6.9925,49.55032,110.93968
36,354713.09,5643165.27,185.38,36,50.921814,6.93287,66.646996,118.733004
37,359746.46,5643005.48,161.46,37,50.921623,7.004498,47.545879,113.914121
42,356540.36,5640995.14,158.25,42,50.902768,6.959703,51.396149,106.853851
90,357901.86,5636292.25,225.3,90,50.86084,6.980882,56.922394,168.377606


In [13]:



scattermapbox_objects = []

scattermapbox_objects.append(go.Scattermapbox(
    mode="markers",
    lon=highest_points['lon'], 
    lat=highest_points['lat'],
    marker={'size': 10, 'color': "red"},
    text=highest_points['z'],  # This will set the hovertext to show the z value
    hoverinfo='text'  # Only display the text on hover
))

scattermapbox_objects.append(go.Scattermapbox(
        name = 'Data limits',
        mode="lines",
        line=dict(color="black", width=1),
        lat=np.array([LAT_MAX, LAT_MAX, LAT_MIN, LAT_MIN, LAT_MAX]),
        lon=np.array([LON_MIN, LON_MAX, LON_MAX, LON_MIN, LON_MIN]),
        hoverinfo='name',
        hoverlabel_namelength=-1   # https://stackoverflow.com/questions/36207887/plot-ly-hover-box-size-attribute
    ))

obs_geojson = './obstacles.geojson'

# Load obstacles GeoJSON file
with open(obs_geojson) as f:
    obstacles_json = json.load(f)

lines = []
for feature in obstacles_json["features"]:
    if feature["geometry"]["type"] == "LineString":
        coords = feature["geometry"]["coordinates"]
        lines.append(coords)
#

for feature in obstacles_json["features"]:
    if feature["geometry"]["type"] == "LineString":
        coords = feature["geometry"]["coordinates"]
        name = feature["properties"]["name"]
        scattermapbox_objects.append(go.Scattermapbox(
            name=name,
            mode="lines",
            line=dict(color="red", width=2),
            lat=np.array(coords)[:,1],
            lon=np.array(coords)[:,0],
            hoverinfo='name',
            hoverlabel_namelength=-1   # https://stackoverflow.com/questions/36207887/plot-ly-hover-box-size-attribute
        ))

# Create a scatter plot of the highest points using Plotly with OpenStreetMap background
fig = go.Figure(data=scattermapbox_objects)

# Set the layout for the map
fig.update_layout(
    mapbox={
        'style': "open-street-map",
        'center': {'lon': np.mean(highest_points['lon']), 'lat': np.mean(highest_points['lat'])},
        'zoom': 12
    },
    showlegend=False
)

# Adjust the margins and set the height
fig.update_layout(height=800, margin={"r":10,"t":10,"l":10,"b":10})

# Show the figure
fig.show()

> Load the official AIP XLSX obstacles snapshot
> Filter according to the bounding box
> Load in a dataframe
> (Add DEM data)

use for comparison