This notebook queries .las file tiles using tile index file, then merges and clips the point cloud to building footprint extent.

### Load dependencies

In [None]:
import boto3
import geopandas as gpd
from shapely.geometry import  mapping
import subprocess
import json
import os
import pdal
import json

### Set input and output

In [None]:
building_footprint_path = r'/home/ubuntu/lavender_floor_height/GA-floor-height/data/wagga_ffh.gpkg' # building footprints
tile_index_file=r"01_WaggaWagga/03_Ancillary/01_TileIndex/rev1/48068_Wagga_Wagga_TileSet.shp"
building_points_file=r'/home/ubuntu/lavender_floor_height/output/Final_Wagga_training_samples_pano_metadata.geojson'
output_directory='/mnt/floorheightvolume/lidar_Wagga'
os.makedirs(output_directory, exist_ok=True)

### Connect to AWS buket

In [None]:
mfa_token = '896573' # replace with latest token
cmd = (f'aws sts get-session-token --serial-number arn:aws:iam::693903849513:mfa/Lavender_AWS_MFA  --token-code {mfa_token}').split()
result = subprocess.run(cmd, capture_output=True)
bucket_name = 'frontiersi-p127-floor-height-woolpert'
s3_prefix='01_WaggaWagga/02_MLSPointCloud/rev1/'

# Check if the command was successful
if result.returncode == 0:
    # Parse JSON output into a dictionary
    data = json.loads(result.stdout)
    #print(data)  # Print or process the dictionary
else:
    print("Error:", result.stderr)

In [None]:
session = boto3.session.Session(
    aws_access_key_id=data['Credentials']['AccessKeyId'],
    aws_secret_access_key=data['Credentials']['SecretAccessKey'],
    aws_session_token=data['Credentials']['SessionToken']
)
# Initialize an S3 client
s3 = session.client('s3')

### Download tile index files

In [None]:
# List all objects in the bucket with the shapefile base name
response = s3.list_objects_v2(Bucket=bucket_name,Prefix=tile_index_file.split('.')[-2])
for obj in response['Contents']:
    key = obj['Key']
    tile_index_file_downloaded=os.path.join(output_directory, os.path.basename(key))
    if os.path.exists(tile_index_file_downloaded):
        print('tile index file exists')
    else:
        # Download LAS file from S3
        s3.download_file(bucket_name, key, tile_index_file_downloaded)

### Load downloaded tile index file

In [None]:
tile_index_file_local=os.path.join(output_directory, os.path.basename(tile_index_file))
gdf_tile_bbox=gpd.read_file(tile_index_file_local)
gdf_tile_bbox.explore()

### Load building footprints

In [None]:
gdf_building_footprint=gpd.read_file(building_footprint_path).to_crs(gdf_tile_bbox.crs)
gdf_building_footprint.head()

In [None]:
gdf_building_footprint.columns

In [None]:
len(gdf_building_footprint)

### Load building points

In [None]:
gdf_building_points=gpd.read_file(building_points_file).to_crs(gdf_tile_bbox.crs)
gdf_building_points=gdf_building_points[gdf_building_points["USAGE"]=="Residential"].reset_index(drop=True)
gdf_building_points.head()

In [None]:
gdf_building_points.columns

In [None]:
len(gdf_building_points)

In [None]:
gdf_building_points=gdf_building_points.rename(columns={'index_right': 'pano_index_right'})

### Identify overlapping footprints and tiles

In [None]:
nearby_gdf_footprint = gpd.sjoin_nearest(gdf_building_footprint,gdf_building_points,how='inner',max_distance=0.5,distance_col='bd_distance').reset_index(drop=True)
nearby_gdf_footprint

In [None]:
m=nearby_gdf_footprint.explore(color='blue',name='building footprint')
gdf_building_points.explore(m=m,color='red',name='building points')
m

### Buffer building footprint

In [None]:
buffer_distance = 2  # Adjust buffer distance based on CRS (e.g., meters or degrees)
nearby_gdf_footprint_buffered=nearby_gdf_footprint.copy()
nearby_gdf_footprint_buffered['geometry'] = nearby_gdf_footprint_buffered.buffer(buffer_distance)

## Test the workflow for a single building

### Find overlapping tile(s)

In [None]:
building_polygon = nearby_gdf_footprint_buffered.geometry.iloc[0]
# Find overlapping tiles
overlapping_tiles = gdf_tile_bbox[gdf_tile_bbox.geometry.intersects(building_polygon)]
overlapping_tiles

In [None]:
m1=overlapping_tiles.explore(color='blue',name='overlapping lidar tiles')
nearby_gdf_footprint.explore(m=m1,color='red',name='building footprints')
m1

### Download all overlapping tiles

In [None]:
# Download and process each overlapping tile
output_directory_original='/mnt/floorheightvolume/lidar_Wagga/original'
os.makedirs(output_directory_original, exist_ok=True)
local_las_files=[]

for _, tile in overlapping_tiles.iterrows():
    las_file_key=os.path.join(s3_prefix,tile["FileName"])
    # tile_name = os.path.basename(las_file_key)
    local_las_path = os.path.join(output_directory_original, tile["FileName"])
    local_las_files.append(local_las_path)
    
    if os.path.exists(local_las_path):
        print('tile already downloaded')
    else:
        # Download LAS file from S3
        s3.download_file(bucket_name, las_file_key, local_las_path)

### Merge and clip to footprint

In [None]:
# Get polygon CRS
polygon_crs = nearby_gdf_footprint_buffered.crs.to_string() if nearby_gdf_footprint_buffered.crs else None
las_crs='EPSG:7855' # needs to update based on study area or extracted from file name
polygon_crs==las_crs

In [None]:
output_directory_clipped='/mnt/floorheightvolume/lidar_Wagga/clipped'
os.makedirs(output_directory_clipped, exist_ok=True)
outfile_basename='gnaf_'+nearby_gdf_footprint_buffered.iloc[0].gnaf_id+'_UFI_'+str(nearby_gdf_footprint_buffered.iloc[0].UFI)+'.las'
out_las_clipped=os.path.join(output_directory_clipped,outfile_basename)

# Create the base pipeline with readers
pipeline_steps = []
if len(local_las_files)>1:
    pipeline_steps.extend(local_las_files)
else:
    pipeline_steps.append(local_las_files[0])

# Add cropping filter
bounds_formatted=f"[{building_polygon.bounds[0]},{building_polygon.bounds[2]},{building_polygon.bounds[1]},{building_polygon.bounds[3]}]"
pipeline_steps.append({
    "type": "filters.crop",
    "bounds": bounds_formatted, # Pre-filter with bounds for efficiency
    "polygon": mapping(building_polygon)
})
# Add writer
pipeline_steps.append({
    "type": "writers.las",
    "filename": out_las_clipped,
    "extra_dims": "all"  # Preserve all dimensions
})
# Create the pipeline
pipeline_json = {"pipeline": pipeline_steps}
try:
    # Execute the pipeline
    pipeline = pdal.Pipeline(json.dumps(pipeline_json))
    pipeline.execute()
except Exception as e:
    print(f"Pipeline failed: {e}")