# AHN tiles download and preprocessing

This notebook downloads and pre-processes AHN elevation data for a given set of point cloud tiles.

In [None]:
import numpy as np
import geopandas as gpd
import shapely.geometry as sg
from tqdm.notebook import tqdm
tqdm.pandas()
import pathlib
import os
import re
import laspy
import matplotlib.pyplot as plt

from upcp.preprocessing import ahn_preprocessing
from upcp.utils import las_utils

import set_path
from upc_sw import poly_utils

In [None]:
### SETTINGS ###

resume = True

# AHN output settings
ahn_version = 'ahn4'  # Either ahn3 or ahn4
ahn_resolution = 0.1  # Resolution for the .npz data
ahn_data_folder = '../../datasets/AHN4/'  # Location where AHN data will be stored
ahn_subtile_folder = '../../datasets/AHN4/AMS_subtiles_1000/'  # Location of AHN subtiles

# https://geotiles.nl/ data source
base_url = f'https://geotiles.nl/{str.upper(ahn_version)}_T/'

# Create folders if they don't exist
pathlib.Path(ahn_data_folder).mkdir(parents=True, exist_ok=True)

ahn_laz_folder = f'{ahn_data_folder}{ahn_version}_laz/'
ahn_npz_folder = f'{ahn_data_folder}{ahn_version}_npz/'

## Load Cyclomedia tile data

In [None]:
run1_tiles = gpd.read_file('run1_tiles_10k.gpkg')
run2_tiles = gpd.read_file('run2_tiles_10k.gpkg')

In [None]:
all_tiles = set(run1_tiles['tilecode']).union(set(run2_tiles['tilecode']))

In [None]:
if resume and os.path.exists(ahn_laz_folder):
    done_tiles = las_utils.get_tilecodes_from_folder(ahn_laz_folder, las_prefix='ahn')
    all_tiles = all_tiles - done_tiles

In [None]:
run1_tiles = None
run2_tiles = None

In [None]:
all_tiles = list(all_tiles)
all_tiles.sort()
all_tiles_gdf = gpd.GeoDataFrame({'tilecode': all_tiles,
                                  'geometry': [poly_utils.tilecode_to_poly(tc) for tc in all_tiles]})

### Load AHN data

In [None]:
ahn_subtiles = [file.name for file in pathlib.Path(ahn_subtile_folder).glob('*.laz')]
ahn_subtiles.sort()

In [None]:
def ahn_file_to_poly(filename, scale=1000):
    """Extract the tile code from a file name."""
    ahn_code = re.match(r'.*(\d{3}_\d{3}).*', filename)[1]
    code_split = ahn_code.split('_')
    x_min = int(code_split[0]) * scale
    y_min = int(code_split[1]) * scale
    return sg.box(x_min, y_min, x_min+scale, y_min+scale)

In [None]:
ahn_gdf = gpd.GeoDataFrame({'filename': ahn_subtiles,
                            'geometry': [ahn_file_to_poly(f) for f in ahn_subtiles]})

### Find AHN subtiles that cover the target area based on CycloMedia tiles

In [None]:
# Generate a merged GeoDataFrame for effiency
merged_poly = all_tiles_gdf.unary_union
if type(merged_poly) == sg.Polygon:
    merged_poly = sg.MultiPolygon([merged_poly])
merged_pc_tiles = gpd.GeoDataFrame({'geometry': [geom for geom in merged_poly.geoms]})

In [None]:
# Filter ahn data based on merged target shapes
ahn_gdf['used'] = ahn_gdf.apply(lambda row: (merged_pc_tiles.intersects(row.geometry) 
                                             & ~merged_pc_tiles.touches(row.geometry)).any(),
                                axis=1)
ahn_gdf = ahn_gdf[ahn_gdf['used']]

In [None]:
# Visualize the result
fig, ax = plt.subplots(1)
ahn_gdf.plot(ax=ax, edgecolor="black", linewidth=0.4, alpha=0.25)
merged_pc_tiles.plot(ax=ax)
ax.set_aspect('equal')
plt.show()

## Pre-process the AHN data

In [None]:
def match_subtile(row):
    target_df = ahn_gdf[ahn_gdf.contains(row.geometry)]
    if len(target_df) == 0:  # Shouldn't happen
        return None
    else:
        return target_df.iloc[0]['filename']

In [None]:
# Match point cloud tiles to AHN subtiles
all_tiles_gdf['subtile'] = all_tiles_gdf.progress_apply(match_subtile, axis=1)

In [None]:
# Generate .laz tiles
pbar = tqdm(total=len(all_tiles_gdf), unit='tile', smoothing=0)
for subtile in all_tiles_gdf['subtile'].unique():
    pbar.set_postfix_str(subtile)
    ahn_cloud = laspy.read(f'{ahn_subtile_folder}{subtile}')
    ahn_cloud = laspy.convert(ahn_cloud, point_format_id=3, file_version='1.2')
    for pc_tile in all_tiles_gdf[all_tiles_gdf['subtile'] == subtile]['tilecode'].values:
        pc_path = f'dummy_{pc_tile}.laz'
        ahn_preprocessing.clip_ahn_las_tile(ahn_cloud, pc_path, out_folder=ahn_laz_folder)
        pbar.update(1)
pbar.close()

In [None]:
# Generate .npz data for all tiles
files = list(pathlib.Path(ahn_laz_folder).glob('ahn_*.laz'))
pathlib.Path(ahn_npz_folder).mkdir(parents=True, exist_ok=True)

file_tqdm = tqdm(files, unit='file', smoothing=0)
for file in file_tqdm:
    ahn_preprocessing.process_ahn_las_tile(
                            file, out_folder=ahn_npz_folder,
                            resolution=ahn_resolution)

In [None]:
# Alternative, uses parallel processing
n_cores = 10
!python ../../Urban_PointCloud_Processing/scripts/ahn_batch_processor.py --in_folder {ahn_laz_folder} --out_folder {ahn_npz_folder} --resume --workers {n_cores}