In [15]:
import geopandas as gpd 

In [17]:
tiles = gpd.read_file("../data/tiles.geojson")
trees_data = gpd.read_file("../data/trees_box.geojson")

In [18]:
tiles.to_crs(epsg=4326, inplace=True)
trees_data.to_crs(epsg=4326, inplace=True)

Unnamed: 0,id,title,geometry
0,"Tile(x=6773, y=293571, z=19)","XYZ tile Tile(x=6773, y=293571, z=19)","POLYGON ((-175.34935 -21.08706, -175.34935 -21..."
1,"Tile(x=6773, y=293572, z=19)","XYZ tile Tile(x=6773, y=293572, z=19)","POLYGON ((-175.34935 -21.0877, -175.34935 -21...."
2,"Tile(x=6773, y=293573, z=19)","XYZ tile Tile(x=6773, y=293573, z=19)","POLYGON ((-175.34935 -21.08834, -175.34935 -21..."
3,"Tile(x=6773, y=293574, z=19)","XYZ tile Tile(x=6773, y=293574, z=19)","POLYGON ((-175.34935 -21.08898, -175.34935 -21..."
4,"Tile(x=6773, y=293575, z=19)","XYZ tile Tile(x=6773, y=293575, z=19)","POLYGON ((-175.34935 -21.08963, -175.34935 -21..."
...,...,...,...
546,"Tile(x=6791, y=293595, z=19)","XYZ tile Tile(x=6791, y=293595, z=19)","POLYGON ((-175.33699 -21.10244, -175.33699 -21..."
547,"Tile(x=6791, y=293596, z=19)","XYZ tile Tile(x=6791, y=293596, z=19)","POLYGON ((-175.33699 -21.10308, -175.33699 -21..."
548,"Tile(x=6791, y=293597, z=19)","XYZ tile Tile(x=6791, y=293597, z=19)","POLYGON ((-175.33699 -21.10372, -175.33699 -21..."
549,"Tile(x=6791, y=293598, z=19)","XYZ tile Tile(x=6791, y=293598, z=19)","POLYGON ((-175.33699 -21.10436, -175.33699 -21..."


In [35]:
import re

def parse_tile_id(tile_id_str):
    match = re.match(r"Tile\(x=(\d+), y=(\d+), z=(\d+)\)", tile_id_str)
    if match:
        return match.groups()
    raise ValueError(f"Cannot parse tile ID: {tile_id_str}")

In [38]:
import os
from pathlib import Path

def split_geojson_by_tiles(trees_gdf, tiles_gdf, output_dir, prefix="OAM"):
    """Clip trees by tiles and save as individual GeoJSON files"""
    os.makedirs(output_dir, exist_ok=True)
    
    stats = {'processed': 0, 'skipped': 0, 'errors': 0, 'total_trees': 0}
    
    for idx, tile in tiles_gdf.iterrows():
        try:
            tile_geom = tile.geometry
            tile_id = tile['id']
            
            x, y, z = parse_tile_id(tile_id)
            tile_filename = f"{prefix}-{x}-{y}-{z}.geojson"
            
            intersecting_trees = trees_gdf[trees_gdf.intersects(tile_geom)].copy()
            
            if intersecting_trees.empty:
                stats['skipped'] += 1
                continue
            
            clipped_trees = gpd.clip(intersecting_trees, tile_geom)
            
            output_path = Path(output_dir) / tile_filename
            clipped_trees.to_file(output_path, driver="GeoJSON")
            
            stats['processed'] += 1
            stats['total_trees'] += len(clipped_trees)
            
        except Exception as e:
            print(f"Error processing tile {idx}: {e}")
            stats['errors'] += 1
            continue
    
    return stats

In [39]:
stats = split_geojson_by_tiles(trees_data, tiles, "../data/labels")

In [40]:
stats

{'processed': 458, 'skipped': 93, 'errors': 0, 'total_trees': 12337}