<a href="https://colab.research.google.com/github/MODA-NYC/nyc-geography-crosswalks/blob/main/NYC_Geographies_Crosswalk_Selector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NYC Geographies: Interactive Crosswalk Selectors

This notebook provides interactive tools for generating two types of custom geographic crosswalk tables for New York City, using the BetaNYC [`all_bounds.geojson`](https://github.com/BetaNYC/nyc-boundaries) dataset:

- **Wide-format Crosswalk**: Generates simplified tables for quick analysis of overlapping features.
- **Long-form Crosswalk**: Generates detailed intersection tables with precise calculations of overlap area and percentage.

### General Workflow:
- **Interactive UI**: Users choose a primary geography and multiple target geographies.
- **Spatial Analysis**: Performs intersections using negative buffering to ensure meaningful overlaps.
- **Custom CSV Output**: Results downloaded immediately after processing.
- **Progress Indicators**: Real-time progress bars display processing status.

### Data Source:
- [BetaNYC nyc-boundaries GeoJSON](https://github.com/BetaNYC/nyc-boundaries)

### Requirements:
- Python libraries: `geopandas`, `requests`, `ipywidgets`, `pandas`, `tqdm`
- Recommended Environment: Google Colab for best interactive experience.

## Wide-format Crosswalk Selector

This selector creates simplified crosswalk tables showing overlaps between your selected primary geography and one or more target geographies. Each row represents one primary geography feature, and each column lists overlapping features (semicolon-separated).

In [None]:
# Install necessary libraries
!pip install geopandas ipywidgets requests tqdm --quiet

import geopandas as gpd
import pandas as pd
import requests
from io import BytesIO
import ipywidgets as widgets
from IPython.display import display, clear_output
from google.colab import files
from tqdm.notebook import tqdm

# Load GeoJSON data
geojson_url = "https://raw.githubusercontent.com/BetaNYC/nyc-boundaries/main/script/all_bounds.geojson"
try:
    response = requests.get(geojson_url)
    response.raise_for_status()
except requests.RequestException as e:
    raise Exception(f"Failed to download the GeoJSON file. Error details: {e}")

gdf = gpd.read_file(BytesIO(response.content)).to_crs(epsg=2263)

# Geography choices
geo_choices = ['pp', 'fb', 'sd', 'bid', 'ibz', 'cd', 'dsny', 'hc',
               'cc_upcoming', 'cc', 'nycongress', 'sa', 'ss', 'nta', 'zipcode', 'hd']

# Interactive widgets
primary_geo_widget = widgets.Dropdown(options=geo_choices, description='Primary:')
target_geo_widget = widgets.SelectMultiple(options=geo_choices, description='Targets:')
run_button = widgets.Button(description="Generate Crosswalk")

output = widgets.Output()

# Display widgets
display(primary_geo_widget, target_geo_widget, run_button, output)

def generate_crosswalk(b):
    with output:
        clear_output()
        primary_geo = primary_geo_widget.value
        target_geos = list(target_geo_widget.value)

        if not target_geos:
            print("Please select at least one target geography.")
            return
        if primary_geo in target_geos:
            print("Primary geography should not be in the selected target geographies.")
            return

        BUFFER_FEET = -200
        MIN_INTERSECTION_AREA = 400

        primary_gdf = gdf[gdf['id'] == primary_geo].copy()
        if primary_gdf.empty:
            print(f"No data found for primary geography '{primary_geo}'. Please select another.")
            return

        all_sindex = gdf.sindex
        crosswalk_records = []

        print("Generating crosswalk...")
        for _, primary_row in tqdm(primary_gdf.iterrows(), total=primary_gdf.shape[0]):
            primary_name = primary_row['nameCol']
            primary_geom_buffered = primary_row.geometry.buffer(BUFFER_FEET)

            candidate_idx = list(all_sindex.intersection(primary_geom_buffered.bounds))
            candidate_features = gdf.iloc[candidate_idx]

            mask = candidate_features.intersects(primary_geom_buffered)
            candidates = candidate_features[mask].copy()

            if not candidates.empty:
                candidates["intersection_area"] = candidates.geometry.intersection(primary_geom_buffered).area
                final_candidates = candidates[candidates["intersection_area"] > MIN_INTERSECTION_AREA]
            else:
                final_candidates = candidates

            record = {f'{primary_geo}': primary_name}

            for geo in target_geos:
                subset = final_candidates[final_candidates['id'] == geo]
                record[geo] = ";".join(subset['nameCol'].unique()) if not subset.empty else ""

            crosswalk_records.append(record)

        crosswalk_df = pd.DataFrame(crosswalk_records)
        display(crosswalk_df.head())

        filename = f'crosswalk_{primary_geo}_to_others.csv'
        crosswalk_df.to_csv(filename, index=False)
        files.download(filename)
        print(f"Crosswalk generation complete. File downloaded: {filename}")

run_button.on_click(generate_crosswalk)


Dropdown(description='Primary:', options=('pp', 'fb', 'sd', 'bid', 'ibz', 'cd', 'dsny', 'hc', 'cc_upcoming', '…

SelectMultiple(description='Targets:', options=('pp', 'fb', 'sd', 'bid', 'ibz', 'cd', 'dsny', 'hc', 'cc_upcomi…

Button(description='Generate Crosswalk', style=ButtonStyle())

Output()

## Long-form Crosswalk Selector

This selector creates detailed crosswalk tables that include explicit calculations of intersection areas and percentages of overlap between selected geographies. Each row provides precise information for one intersection pair.


In [9]:
# Long-form Crosswalk Selector (Interactive) - Updated with union_all()
import geopandas as gpd
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output
from google.colab import files
from tqdm.notebook import tqdm

# Interactive widgets
primary_geo_widget_long = widgets.Dropdown(options=geo_choices, description='Primary (Long):')
target_geo_widget_long = widgets.SelectMultiple(options=geo_choices, description='Targets (Long):')
run_button_long = widgets.Button(description="Generate Long-form Crosswalk")

output_long = widgets.Output()

# Display widgets
display(primary_geo_widget_long, target_geo_widget_long, run_button_long, output_long)

def generate_longform_crosswalk(b):
    with output_long:
        clear_output()
        primary_geo = primary_geo_widget_long.value
        target_geos = list(target_geo_widget_long.value)

        if not target_geos:
            print("Please select at least one target geography.")
            return
        if primary_geo in target_geos:
            print("Primary geography should not be in the selected target geographies.")
            return

        BUFFER_FEET = -200
        MIN_INTERSECTION_AREA = 40

        primary_gdf = gdf[gdf['id'] == primary_geo].copy()
        if primary_gdf.empty:
            print(f"No data found for primary geography '{primary_geo}'. Please select another.")
            return

        spatial_index = gdf.sindex
        rows = []

        print("Generating long-form crosswalk...")
        for _, primary_row in tqdm(primary_gdf.iterrows(), total=primary_gdf.shape[0]):
            primary_name = primary_row['nameCol']
            primary_geom = primary_row.geometry
            primary_area = primary_geom.area
            primary_geom_buffered = primary_geom.buffer(BUFFER_FEET)

            candidate_idx = list(spatial_index.intersection(primary_geom_buffered.bounds))
            candidate_features = gdf.iloc[candidate_idx]

            for other_id in target_geos:
                if other_id == primary_geo:
                    continue  # Skip self-comparison

                subset = candidate_features[
                    (candidate_features['id'] == other_id) &
                    (candidate_features.intersects(primary_geom_buffered))
                ].copy()

                if not subset.empty:
                    subset['intersect_area'] = subset.geometry.intersection(primary_geom_buffered).area
                    subset = subset[subset['intersect_area'] > MIN_INTERSECTION_AREA]
                else:
                    subset = gpd.GeoDataFrame(columns=gdf.columns)

                for name_val in subset['nameCol'].unique():
                    feats_same_name = subset[subset['nameCol'] == name_val]

                    if not feats_same_name.empty:
                        union_geom = feats_same_name.geometry.union_all()
                        inter_geom = primary_geom.intersection(union_geom)
                        inter_area = inter_geom.area if not inter_geom.is_empty else 0
                        perc_overlap = (inter_area / primary_area) * 100 if primary_area > 0 else 0
                    else:
                        inter_area = 0
                        perc_overlap = 0

                    row = {
                        "Primary Geography ID": primary_geo,
                        "Primary Geography NameCol": primary_name,
                        "Other Geography ID": other_id,
                        "Other Geography NameCol": name_val,
                        "Primary Area (sq ft)": primary_area,
                        "Intersection Area (sq ft)": inter_area,
                        "Percentage Overlap": perc_overlap
                    }
                    rows.append(row)

        overlap_df = pd.DataFrame(rows)
        display(overlap_df.head())

        filename = f'longform_crosswalk_{primary_geo}_to_others.csv'
        overlap_df.to_csv(filename, index=False)
        files.download(filename)
        print(f"Long-form crosswalk generation complete. File downloaded: {filename}")

run_button_long.on_click(generate_longform_crosswalk)


Dropdown(description='Primary (Long):', options=('pp', 'fb', 'sd', 'bid', 'ibz', 'cd', 'dsny', 'hc', 'cc_upcom…

SelectMultiple(description='Targets (Long):', options=('pp', 'fb', 'sd', 'bid', 'ibz', 'cd', 'dsny', 'hc', 'cc…

Button(description='Generate Long-form Crosswalk', style=ButtonStyle())

Output()