<a href="https://colab.research.google.com/github/MODA-NYC/nyc-geography-crosswalks/blob/main/NYC_Geographies_Crosswalk_Selector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NYC Geographies: Interactive Crosswalk Selector

This notebook provides an interactive tool for generating custom geographic crosswalk tables for New York City, using the BetaNYC `all_bounds.geojson` dataset.

### How it works:
- **Interactive UI**: Users select a primary geography (e.g., Community District) and multiple target geographies (e.g., ZIP codes, NTAs).
- **Spatial Filtering**: Performs spatial intersections with negative buffering to ensure meaningful overlaps.
- **Custom Output**: Generates a CSV file showing overlaps between selected geographies, structured clearly with semicolon-separated identifiers.
- **Progress Feedback**: Provides a real-time progress bar during data processing.

### Data Source:
- [BetaNYC nyc-boundaries GeoJSON](https://github.com/BetaNYC/nyc-boundaries)

### Requirements:
- Python libraries: `geopandas`, `requests`, `ipywidgets`, `pandas`
- Environment: Google Colab recommended for interactive use.

### Output:
- **CSV file**: Custom-generated based on user selections, downloaded directly upon completion.

---

In [7]:
# Install necessary libraries
!pip install geopandas ipywidgets requests tqdm --quiet

import geopandas as gpd
import pandas as pd
import requests
from io import BytesIO
import ipywidgets as widgets
from IPython.display import display, clear_output
from google.colab import files
from tqdm.notebook import tqdm

# Load GeoJSON data
geojson_url = "https://raw.githubusercontent.com/BetaNYC/nyc-boundaries/main/script/all_bounds.geojson"
try:
    response = requests.get(geojson_url)
    response.raise_for_status()
except requests.RequestException as e:
    raise Exception(f"Failed to download the GeoJSON file. Error details: {e}")

gdf = gpd.read_file(BytesIO(response.content)).to_crs(epsg=2263)

# Geography choices
geo_choices = ['pp', 'fb', 'sd', 'bid', 'ibz', 'cd', 'dsny', 'hc',
               'cc_upcoming', 'cc', 'nycongress', 'sa', 'ss', 'nta', 'zipcode', 'hd']

# Interactive widgets
primary_geo_widget = widgets.Dropdown(options=geo_choices, description='Primary:')
target_geo_widget = widgets.SelectMultiple(options=geo_choices, description='Targets:')
run_button = widgets.Button(description="Generate Crosswalk")

output = widgets.Output()

# Display widgets
display(primary_geo_widget, target_geo_widget, run_button, output)

def generate_crosswalk(b):
    with output:
        clear_output()
        primary_geo = primary_geo_widget.value
        target_geos = list(target_geo_widget.value)

        if not target_geos:
            print("Please select at least one target geography.")
            return
        if primary_geo in target_geos:
            print("Primary geography should not be in the selected target geographies.")
            return

        BUFFER_FEET = -200
        MIN_INTERSECTION_AREA = 400

        primary_gdf = gdf[gdf['id'] == primary_geo].copy()
        if primary_gdf.empty:
            print(f"No data found for primary geography '{primary_geo}'. Please select another.")
            return

        all_sindex = gdf.sindex
        crosswalk_records = []

        print("Generating crosswalk...")
        for _, primary_row in tqdm(primary_gdf.iterrows(), total=primary_gdf.shape[0]):
            primary_name = primary_row['nameCol']
            primary_geom_buffered = primary_row.geometry.buffer(BUFFER_FEET)

            candidate_idx = list(all_sindex.intersection(primary_geom_buffered.bounds))
            candidate_features = gdf.iloc[candidate_idx]

            mask = candidate_features.intersects(primary_geom_buffered)
            candidates = candidate_features[mask].copy()

            if not candidates.empty:
                candidates["intersection_area"] = candidates.geometry.intersection(primary_geom_buffered).area
                final_candidates = candidates[candidates["intersection_area"] > MIN_INTERSECTION_AREA]
            else:
                final_candidates = candidates

            record = {f'{primary_geo}': primary_name}

            for geo in target_geos:
                subset = final_candidates[final_candidates['id'] == geo]
                record[geo] = ";".join(subset['nameCol'].unique()) if not subset.empty else ""

            crosswalk_records.append(record)

        crosswalk_df = pd.DataFrame(crosswalk_records)
        display(crosswalk_df.head())

        filename = f'crosswalk_{primary_geo}_to_others.csv'
        crosswalk_df.to_csv(filename, index=False)
        files.download(filename)
        print(f"Crosswalk generation complete. File downloaded: {filename}")

run_button.on_click(generate_crosswalk)


Dropdown(description='Primary:', options=('pp', 'fb', 'sd', 'bid', 'ibz', 'cd', 'dsny', 'hc', 'cc_upcoming', '…

SelectMultiple(description='Targets:', options=('pp', 'fb', 'sd', 'bid', 'ibz', 'cd', 'dsny', 'hc', 'cc_upcomi…

Button(description='Generate Crosswalk', style=ButtonStyle())

Output()