In [1]:
# Import Libraries
import sys
from pathlib import Path
import geopandas as gpd

# Add src to path
sys.path.insert(0, str(Path.cwd() / 'src'))

# Import our library functions
from src import (
    # Building functions
    load_buildings, process_buildings, save_buildings,
    # POI functions
    load_pois, process_pois, save_pois,
    # Spatial matching
    match_pois_to_buildings, join_matches_to_pois,
    # Heuristics
    apply_heuristics_to_pois,
    # Unified POI generation
    create_unified_pois, save_unified_pois, print_summary_statistics
)

# Configuration
place_name = "Brooklyn, New York, USA"
CACHE_DIR = Path("cache")
CACHE_DIR.mkdir(exist_ok=True)

# Cache file paths
BUILDINGS_CACHE = CACHE_DIR / "buildings_raw.geojson"
POIS_CACHE = CACHE_DIR / "pois_raw.geojson"

USE_CACHE = True  # Set to False to force re-download

In [None]:
# Load and Process Buildings

# Load buildings
buildings_raw = load_buildings(
    place_name=place_name,
    cache_path=BUILDINGS_CACHE,
    use_cache=USE_CACHE
)

# Process buildings to calculate square footage
buildings_gdf = process_buildings( buildings_raw)

# Display sample
print(buildings_gdf[['building_id', 'building', 'footprint_sqft', 'estimated_floors', 'total_sqft']].head(10))

In [None]:
# Load and Process POIs

# Load POIs
pois_raw = load_pois(
    place_name=place_name,
    cache_path=POIS_CACHE,
    use_cache=USE_CACHE
)

# Process POIs (filter and get centroids)
pois_gdf = process_pois(
    pois_raw,
    filter_non_trip_generators=True
)

# Show POI type distribution
for col in ['amenity', 'shop', 'office', 'leisure', 'tourism', 'public_transport']:
    if col in pois_gdf.columns:
        counts = pois_gdf[col].dropna().value_counts().head(10)
        if len(counts) > 0:
            print(f"\nTop 10 {col} types:")
            print(counts)

In [None]:
# Match POIs to Buildings

# Perform spatial matching
matches_df = match_pois_to_buildings(buildings_gdf, pois_gdf)

# Join matches back to POIs
pois_matched = join_matches_to_pois(pois_gdf, matches_df)

In [None]:
# Apply Brooklyn Heuristics for Space Allocation

# Apply heuristics to allocate space
processed_pois_df = apply_heuristics_to_pois(pois_matched, buildings_gdf)

# Summary statistics
import pandas as pd
remaining_flags = processed_pois_df.get('is_remaining')
if isinstance(remaining_flags, pd.Series):
    remaining_mask = remaining_flags.fillna(False).astype(bool)
else:
    remaining_mask = pd.Series(False, index=processed_pois_df.index)

actual_pois = processed_pois_df[~remaining_mask]
remaining_pois = processed_pois_df[remaining_mask]

print(f"\nSpace Allocation Summary:")
print(f"  Actual POI sqft: {actual_pois['poi_sqft'].sum():,.0f}")
print(f"  Remaining/upper floor sqft: {remaining_pois['poi_sqft'].sum():,.0f}")
print(f"  Total allocated: {processed_pois_df['poi_sqft'].sum():,.0f}")

# Verify no double-counting
print(f"\nDouble-count check (sample buildings with multiple POIs):")
multi_poi_buildings = processed_pois_df.groupby('building_id').filter(lambda x: len(x) > 1)
if len(multi_poi_buildings) > 0:
    sample_check = multi_poi_buildings.groupby('building_id').agg({
        'poi_sqft': 'sum',
        'building_total_sqft': 'first'
    }).head(5)
    sample_check['ratio'] = sample_check['poi_sqft'] / sample_check['building_total_sqft']
    print(sample_check)

In [None]:
# Create Unified POI Dataset

# Create unified POI dataset
unified_gdf = create_unified_pois(processed_pois_df, buildings_gdf)

# Print summary statistics
print_summary_statistics(unified_gdf)

# Validation: check sample multi-POI buildings
print(f"\n=== Validation: Sample Multi-POI Buildings ===")
multi_poi = unified_gdf[unified_gdf['source'].isin(['osm_poi', 'inferred_remaining'])]
multi_poi_buildings = multi_poi.groupby('building_id').filter(lambda x: len(x) > 1)
if len(multi_poi_buildings) > 0:
    for bldg_id in multi_poi_buildings['building_id'].unique()[:3]:
        bldg_pois = multi_poi_buildings[multi_poi_buildings['building_id'] == bldg_id]
        bldg_total = buildings_gdf[buildings_gdf['building_id'] == bldg_id]['total_sqft'].values[0]
        poi_total = bldg_pois['sqft'].sum()
        print(f"\nBuilding {bldg_id}: {bldg_total:,.0f} sqft building, {poi_total:,.0f} sqft allocated (ratio: {poi_total/bldg_total:.2f})")
        for _, p in bldg_pois.iterrows():
            print(f"  - {p['poi_type']}: {p['sqft']:,.0f} sqft ({p['source']})")

In [None]:
# Save Outputs

# Save unified POI dataset
save_unified_pois(
    unified_gdf,
    geojson_path="data/unified_pois_brooklyn.geojson",
    csv_path="data/unified_pois_brooklyn.csv"
)

# Save buildings
save_buildings(buildings_gdf, "data/buildings_brooklyn.geojson")

print(f"\nFiles ready for trip generation!")

In [None]:
# Import the new trip generator functions
from src import (
    create_trip_generators,
    save_trip_generators,
    print_trip_gen_summary
)

# Create trip generators with proper unit conversions
trip_generators_gdf = create_trip_generators(processed_pois_df, buildings_gdf)

# Print summary with trip generation units
print_trip_gen_summary(trip_generators_gdf)

# Save with new naming convention
save_trip_generators(
    trip_generators_gdf,
    geojson_path="trip_generators_brooklyn.geojson",
    csv_path="trip_generators_brooklyn.csv"
)

print(f"\nTrip generator files ready for trip generation modeling!")

In [None]:
# Example: Show how trip generators map to trip generation rates
print("=== Trip Generator to Trip Rate Mapping Examples ===\n")

# Sample a few different land use types
sample_generators = trip_generators_gdf.groupby('trip_gen_category').first().head(10)

for idx, gen in sample_generators.iterrows():
    print(f"Land Use: {gen['land_use_type']}")
    print(f"  → Trip Gen Category: {idx}")
    print(f"  → Value: {gen['trip_gen_value']:.1f} {gen['trip_gen_unit']}")
    print(f"  → Original: {gen['sqft']:,.0f} sqft")
    print()

# Show unit distribution
print("=== Unit Type Distribution ===")
unit_counts = trip_generators_gdf.groupby('trip_gen_unit').agg({
    'generator_id': 'count',
    'trip_gen_value': 'sum'
})
unit_counts.columns = ['count', 'total_value']
print(unit_counts)