In [None]:
# Import Libraries
import sys
from pathlib import Path
import geopandas as gpd

# Add src to path
sys.path.insert(0, str(Path.cwd() / 'src'))

# Import our library functions
from src.config import CityConfig
from src.building_processor import load_buildings, process_buildings
from src.poi_processor import load_pois, process_pois
from src.spatial_matcher import match_pois_to_buildings, join_matches_to_pois
from src.heuristics import apply_heuristics_to_pois
from src.trip_generator import (
    create_trip_generators,
    save_trip_generators,
    print_trip_gen_summary
)

# --- Configuration ---
CITY = 'brooklyn'
config = CityConfig(city_name=CITY)
place_name = "Brooklyn, New York, USA"

# Verbosity control - set to False for minimal output
VERBOSE = True  # Set to False for only critical messages and high-level stats

CACHE_DIR = Path("cache")
CACHE_DIR.mkdir(exist_ok=True)
USE_CACHE = True  # Set to False to force re-download

# Cache and output file paths
BUILDINGS_CACHE = CACHE_DIR / f"buildings_raw_{CITY}.geojson"
POIS_CACHE = CACHE_DIR / f"pois_raw_{CITY}.geojson"
BUILDINGS_OUT = Path('data') / f"buildings_{CITY}.geojson"
TRIP_GENERATORS_OUT_GEOJSON = Path('data') / f"trip_generators_{CITY}.geojson"
TRIP_GENERATORS_OUT_CSV = Path('data') / f"trip_generators_{CITY}.csv"

In [None]:
# Load and Process Buildings

# Load buildings
buildings_raw = load_buildings(
    place_name=place_name,
    cache_path=BUILDINGS_CACHE,
    use_cache=USE_CACHE,
    verbose=VERBOSE
)

# Process buildings to calculate square footage
buildings_gdf = process_buildings(buildings_raw, config, verbose=VERBOSE)

# Display sample if verbose
if VERBOSE:
    print("\nSample buildings data:")
    print(buildings_gdf[['building_id', 'building', 'footprint_sqft', 'estimated_floors', 'total_sqft']].head(10))

In [None]:
# Load and Process POIs

# Load POIs
pois_raw = load_pois(
    place_name=place_name,
    cache_path=POIS_CACHE,
    use_cache=USE_CACHE,
    verbose=VERBOSE
)

# Process POIs (filter and get centroids)
pois_gdf = process_pois(
    pois_raw,
    filter_non_trip_generators=True,
    verbose=VERBOSE
)

# Show POI type distribution if verbose
if VERBOSE:
    for col in ['amenity', 'shop', 'office', 'leisure', 'tourism', 'public_transport']:
        if col in pois_gdf.columns:
            counts = pois_gdf[col].dropna().value_counts().head(10)
            if len(counts) > 0:
                print(f"\nTop 10 {col} types:")
                print(counts)

In [None]:
# Match POIs to Buildings

# Perform spatial matching
matches_df = match_pois_to_buildings(buildings_gdf, pois_gdf, verbose=VERBOSE)

# Join matches back to POIs
pois_matched = join_matches_to_pois(pois_gdf, matches_df, verbose=VERBOSE)

In [None]:
# Apply Brooklyn Heuristics for Space Allocation

# Apply heuristics to allocate space
processed_pois_df = apply_heuristics_to_pois(pois_matched, buildings_gdf, verbose=VERBOSE)

# Summary statistics if verbose
if VERBOSE:
    import pandas as pd
    remaining_flags = processed_pois_df.get('is_remaining')
    if isinstance(remaining_flags, pd.Series):
        remaining_mask = remaining_flags.fillna(False).infer_objects(copy=False).astype(bool)
    else:
        remaining_mask = pd.Series(False, index=processed_pois_df.index)

    actual_pois = processed_pois_df[~remaining_mask]
    remaining_pois = processed_pois_df[remaining_mask]

    print(f"\nSpace Allocation Summary:")
    print(f"  Actual POI sqft: {actual_pois['poi_sqft'].sum():,.0f}")
    print(f"  Remaining/upper floor sqft: {remaining_pois['poi_sqft'].sum():,.0f}")
    print(f"  Total allocated: {processed_pois_df['poi_sqft'].sum():,.0f}")

    # Verify no double-counting
    print(f"\nDouble-count check (sample buildings with multiple POIs):")
    multi_poi_buildings = processed_pois_df.groupby('building_id').filter(lambda x: len(x) > 1)
    if len(multi_poi_buildings) > 0:
        sample_check = multi_poi_buildings.groupby('building_id').agg({
            'poi_sqft': 'sum',
            'building_total_sqft': 'first'
        }).head(5)
        sample_check['ratio'] = sample_check['poi_sqft'] / sample_check['building_total_sqft']
        print(sample_check)

In [None]:
# Create and Save Final Trip Generator Dataset

# This function now handles the unified creation and unit conversion
trip_generators_gdf = create_trip_generators(processed_pois_df, buildings_gdf, config, verbose=VERBOSE)

# Print summary with trip generation units if verbose
if VERBOSE:
    print_trip_gen_summary(trip_generators_gdf)

# Save the final trip generator files
save_trip_generators(
    trip_generators_gdf,
    geojson_path=TRIP_GENERATORS_OUT_GEOJSON,
    csv_path=TRIP_GENERATORS_OUT_CSV,
    verbose=VERBOSE
)

# Save the processed buildings file as well
buildings_gdf.to_file(BUILDINGS_OUT, driver="GeoJSON")
if VERBOSE:
    print(f"Saved processed buildings to: {BUILDINGS_OUT}")

print(f"\nWorkflow complete!")