# Locate point cloud tiles in storage based on target areas

In [None]:
import pandas as pd
import geopandas as gpd
from tqdm.notebook import tqdm
tqdm.pandas()
import shapely.geometry as sg
import shutil
import pathlib
import matplotlib.pyplot as plt

In [None]:
### SETTINGS ###

run = 'run2'  # or 'run2'

base_folder = '/XX/YY/ZZ'
data_file = f'../datasets/AMS_{run}_tiles_list.csv'
out_folder = f'extract_{run}/'

# For target points only: target points in RD coordinates
target_points = [(122583.85, 485431.99),
                 (122579.3, 489044.4),
                 (118957.2, 488194.2),
                 (119461.9, 485670.7)]

# For neighbourhoods only: specify which neighbourhoods to keep
neighbourhood_data = '../datasets/neighbourhoods_amsterdam.csv'
neighbourhood_codes = ['M27', 'M28']

# For target area only: target area as Shapely Polygon or MultiPolygon
target_area = sg.Polygon([(122550, 485300), (122750, 485300), (122550, 485500)])

# Create output folder if it doesn't exist
pathlib.Path(out_folder).mkdir(parents=True, exist_ok=True)

## Load CycloMedia tile data

In [None]:
all_tiles = pd.read_csv(data_file)
all_tiles.head()

In [None]:
# If we know the stadsdeel, we can filter to speed things up.
# E.g., Zuid:
# all_tiles = all_tiles[all_tiles.folder=='nl-amsd-200608-7415-laz']

In [None]:
# Convert to GeoDataFrame
all_tiles['geometry'] = all_tiles_gdf.progress_apply(lambda row: sg.box(row.RD_X, row.RD_Y, row.RD_X+50, row.RD_Y+50), axis=1)
all_tiles_gdf = gpd.GeoDataFrame(all_tiles)
all_tiles = None

## Filter by target points

In [None]:
# Convert to GeoDataFrame
target_points = gpd.GeoDataFrame({'geometry': [sg.Point(pt) for pt in target_points]})

# Locate tiles which contain any of the target points
all_tiles_gdf['target'] = all_tiles_gdf.progress_apply(lambda row: target_points.intersects(row.geometry).any(), axis=1)
target_tiles_gdf = all_tiles_gdf[all_tiles_gdf.target]
print(f'{len(target_tiles_gdf)} tiles located ({len(target_tiles_gdf.tilecode.unique())} unique).')
print("TODO: if these numbers don't match, duplicate tiles are present. Deal with that :)")

In [None]:
# Check which target points are covered by point cloud tiles
target_points['covered'] = target_points.progress_apply(lambda row: target_tiles_gdf.contains(row.geometry).any(), axis=1)
print(f'{target_points.covered.sum()} out of {target_points.covered.count()} points covered.')

In [None]:
# Visualize the result
fig, ax = plt.subplots(1)

target_tiles_gdf.plot(ax=ax, edgecolor="black", linewidth=0.4, alpha=0.25)
target_points.plot(ax=ax, color='red', markersize=1)
ax.set_aspect('equal')

## Filter by target area

### If the area is defined by neighbourhood codes, process those first

In [None]:
# Load neighbourhood data and convert to RD
neighbourhood_df = pd.read_csv(neighbourhood_data, index_col=0)
neighbourhood_df['geometry'] = gpd.GeoSeries.from_wkt(neighbourhood_df['geometry'])
neighbourhood_gdf = gpd.GeoDataFrame(neighbourhood_df, crs='wgs84')
neighbourhood_gdf.to_crs(epsg='28992', inplace=True)
neighbourhood_df = None

In [None]:
# Filter by target areas
neighbourhood_gdf['target'] = neighbourhood_gdf['neighbourhood_combination_code'].isin(neighbourhood_codes)

In [None]:
# Visualize
neighbourhood_gdf.plot(column='target')

In [None]:
# Merge areas
target_area = neighbourhood_gdf[neighbourhood_gdf['target']].unary_union

### Locate the required tiles

In [None]:
# Match point cloud tiles
all_tiles_gdf['target'] = all_tiles_gdf.progress_apply(
            lambda row: target_area.intersects(row.geometry) and not target_area.touches(row.geometry),
            axis=1)
target_tiles_gdf = all_tiles_gdf[all_tiles_gdf.target]
print(f'{len(target_tiles_gdf)} tiles located ({len(target_tiles_gdf.tilecode.unique())} unique).')
print("TODO: if these numbers don't match, duplicate tiles are present. Deal with that :)")

In [None]:
coverage_percent = (target_area.intersection(target_tiles_gdf.unary_union).area
                    / target_area.area) * 100
print(f'Coverage of target area: {coverage_percent:.2f} %')

In [None]:
# Visualize the result
fig, ax = plt.subplots(1)

target_tiles_gdf.plot(ax=ax, edgecolor="black", linewidth=0.4, alpha=0.25)
if type(target_area) == sg.Polygon:
    target_area = sg.MultiPolygon([target_area])
for ta in target_area.geoms:
    ax.plot(*ta.exterior.xy, '--k')
ax.set_aspect('equal')

## Copy the files

In [None]:
# Copy the located tiles.

dry_run = True

files = [pathlib.Path(f'{base_folder}{folder}/las_processor_bundled_out/filtered_{tilecode}.laz')
         for folder, tilecode in zip(target_tiles_gdf['folder'], target_tiles_gdf['tilecode'])]

for f in tqdm(files):
    if dry_run:
        print(f'{f.as_posix()} => {out_folder}{f.name}')
    else:
        shutil.copy(f.as_posix(), f'{out_folder}{f.name}')