In [26]:
import laspy
import numpy as np
from scipy.interpolate import griddata
from pyproj import Transformer, CRS
from tqdm import tqdm
import pandas as pd
import sys
import pygmt
from sklearn.cluster import DBSCAN
import utm
import logging
import json
import os
import time
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling

# Initialisation and constants

In [2]:
zone_files_list = "./download_lists/zone_1.1_files.txt"
zone_laz_dir = "/Volumes/SSD2/Split_NRW/zone_1.1/"
zone_DEM_dir = "/Volumes/SSD2/Split_NRW/zone_1.1_DEM/"

log_dir = "./logs/"
index_file = "./assets/index.json"

In [27]:
# Constants for laz processing

SSFACTOR = 1 # Subsampling factor for points cloud

lastReturnNichtBoden = 20
brueckenpunkte = 17
unclassified = 1

class_ok = [brueckenpunkte, lastReturnNichtBoden, unclassified]

dst_crs = 'EPSG:4326'

In [4]:
# Initialise logging
# Set at DEBUG if necessary

logging.basicConfig(filename=f'{log_dir}/data_processing.log', 
                    level=logging.INFO, 
                    format='%(asctime)s:%(levelname)s:%(message)s')

# Functions definition

In [5]:
def load_file_to_list(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
        # Strip newline characters from each line
        lines = [line.strip() for line in lines]
    return lines

In [6]:
# Create a dictionary for quick lookup from the JSON data (much quicker than recursively looking up in the JSON)

def create_lookup_dict(json_data):

    lookup_dict = {}
    for dataset in json_data.get('datasets', []):
        for file in dataset.get('files', []):
            lookup_dict[file['name']] = (file['size'], file['timestamp'])
    return lookup_dict

In [14]:
def calculate_size(filenames, lookup_dict):
    total_files = 0
    total_size = 0
    not_found_files = []

    for filename in filenames:
        file_info = lookup_dict.get(filename)
        if file_info:
            total_files += 1
            total_size += int(file_info[0])  # file_info[0] is the size
        else:
            not_found_files.append(filename)

    return total_files, round(total_size / (1024**3), 2), not_found_files  # Size in GB and list of not found files


In [22]:
def check_files_exist(file_list, directory):
    missing_files = []
    for file in file_list:
        file_path = os.path.join(directory, file)
        if not os.path.exists(file_path):
            missing_files.append(file)
    return missing_files

# Loading necessary data and perform verifications

In [13]:
# Load the index file and create a lookup dictionary

with open(index_file, 'r') as file:
    data = json.load(file)
lookup_dict = create_lookup_dict(data)
logging.info("Index file loaded.")

# Load .laz files list and calculate number of files and size

laz_list = load_file_to_list(zone_files_list)
index_info = calculate_size(laz_list, lookup_dict)
logging.info(f"Loaded .laz file list {zone_files_list}, found {index_info[0]} files, size is {index_info[1]} GB.")

# Check that all .laz files in the list exist in the index and .laz directory

if index_info[2]:
    logging.error(f"The following files were not found in the index: {index_info[2]}")

missing_laz = check_files_exist(laz_list, zone_laz_dir)

if not missing_laz:
    logging.info("All .laz files are present in the LAZ directory.")
else:
    logging.error("The following .laz files were not found:", missing_laz)

In [23]:
# Create a DEM file list and perform verifications

def convert_filenames(laz_files):
    dem_files = []
    for file in laz_files:
        # Split the file name to extract the necessary parts
        parts = file.split('_')
        # Construct the new file name with the desired format
        new_file = f"dgm1_32_{parts[2]}_{parts[3]}_1_nw.tif"
        dem_files.append(new_file)
    return dem_files

dem_list = convert_filenames(laz_list)

missing_DEM = check_files_exist(dem_list, zone_DEM_dir)

if not missing_DEM:
    logging.info("All DEM .tif files are present in the DEM directory.")
else:
    logging.error("The following DEM .tif files were not found:", missing_DEM)

['dgm1_32_328_5635_1_nw.tif']

# Batch processing

In [None]:
logging.info("Starting to process...")
start_time = time.time()

for laz_file in laz_list:

    laz_file_path = zone_laz_dir + laz_file

    with laspy.open(laz_file_path) as file:
        las = file.read()
    
    logging.debug(f"File {laz_file} loaded")

    class_val = las.classification[::SSFACTOR]

    mask = (np.isin(class_val, class_ok))

    points = np.vstack((las.x[::SSFACTOR][mask], las.y[::SSFACTOR][mask], las.z[::SSFACTOR][mask])).transpose()

    DEM_file = convert_filenames([laz_file])[0]

    with rasterio.open(DEM_file) as src:
        transform, width, height = calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds)
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        with rasterio.open('./temp_DEM_file.tif', 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest)
    
    logging.debug("Saved temporary reprojected DEM file")

    









end_time = time.time()

# Compute execution time
execution_time = end_time - start_time

logging.info(f"Execution time: {execution_time} seconds")