In [None]:
%config InlineBackend.figure_formats = ["retina"]

In [None]:
!pip install -q folium geopandas

# Neighborhoods
Based on ["An Extremely Detailed Map of New York City Neighborhoods"](https://www.nytimes.com/interactive/2023/upshot/extremely-detailed-nyc-neighborhood-map.html) (Buchanan et al. 2023)

In [None]:
import json
from math import log

import folium
import geopandas as gpd
import matplotlib.pyplot as plt

from matplotlib.colors import to_hex
from shapely import MultiPolygon

In [None]:
submissions_gdf = gpd.read_file("https://raw.githubusercontent.com/PUBPOL-2130/notebooks/refs/heads/main/data/nyt_neighborhood_submissions.geojson")

In [None]:
submissions_gdf

In [None]:
submissions_gdf.plot()

In [None]:
submissions_gdf["neighborhood"].value_counts()

In [None]:
submissions_map = folium.Map(
    [40.787, -73.9754],
    zoom_start=13,
    tiles="Cartodb Positron",  # use a less cluttered basemap
)
for _, row in submissions_gdf[submissions_gdf["neighborhood"] == "Upper West Side"].iterrows():
    folium.PolyLine([(y, x) for x, y in row.geometry.exterior.coords], weight=2, opacity=0.5).add_to(submissions_map)

In [None]:
submissions_map

In [None]:
!curl -O https://raw.githubusercontent.com/PUBPOL-2130/notebooks/refs/heads/main/data/nyt_neighborhood_block_weights.json

In [None]:
block_weights = json.load(open("nyt_neighborhood_block_weights.json"))

In [None]:
list(block_weights.items())[:10]  # Census block GEOID -> neighborhood weights

## Neighborhood cores

In [None]:
!curl -O https://www2.census.gov/geo/tiger/TIGER2024/TABBLOCK20/tl_2024_36_tabblock20.zip

In [None]:
block_gdf = gpd.read_file("tl_2024_36_tabblock20.zip").set_index("GEOID20")

two statistics:
1. of the submissions that include \<block\>, what % labeled it as \<neighborhood\>? (global: what would most New Yorkers call this block?)
2. of the submissions that are labeled with \<neighborhood\>, what % include \<block\>? (local: among the people who agree that this neighborhood exists, what do they consider it to be?)

consider the cases of:
* a cohesive but unpopular neighborhood (most blocks have high (1), low (2))
* a diffuse/controversial but popular neighborhood (most blocks have low (1), high (2))


In [None]:
def core(neighborhood, cutoff=0):  # def. (1)
    neighborhood_weights = {
        geoid: weights[neighborhood]
        for geoid, weights in block_weights.items()
        if neighborhood in weights and weights[neighborhood] >= cutoff
    }
    core_gdf = block_gdf.loc[neighborhood_weights.keys()]
    core_gdf["weight"] = neighborhood_weights 
    return core_gdf.reset_index()

In [None]:
core_map = folium.Map(
    [40.787, -73.9754],
    zoom_start=13,
    tiles="Cartodb Positron",  # use a less cluttered basemap
)
core_gdf = core("Upper West Side", cutoff=0.1)

folium.Choropleth(
    geo_data=core_gdf,
    data=core_gdf,
    columns=["GEOID20", "weight"],
    key_on="feature.properties.GEOID20",
    fill_color="OrRd",
).add_to(core_map)

In [None]:
core_map

In [None]:
block_neighborhood_counts = {
    geoid: len(weights)
    for geoid, weights in block_weights.items()
}

In [None]:
plt.hist(block_neighborhood_counts.values(), bins=range(1, max(block_neighborhood_counts.values())))
plt.title("Neighborhood uncertainty: block counts by # of neighborhoods")
plt.xlabel("# of neighborhoods block is in")
plt.ylabel("Count")
plt.show()

In [None]:
block_entropies = {
    geoid: -sum(v * log(v) for v in weights.values())
    for geoid, weights in block_weights.items()
}

In [None]:
plt.hist(block_entropies.values(), bins=25)
plt.title("Neighborhood uncertainty: block counts by Shannon entropy")
plt.xlabel("Shannon entropy of neighborhood assignment")
plt.ylabel("Count")
plt.show()

In [None]:
block_gdf["entropy"] = block_entropies
nyc_gdf = block_gdf.loc[block_entropies.keys()].reset_index()

In [None]:
nyc_gdf.head(5)

In [None]:
top_neighborhoods = submissions_gdf["neighborhood"].value_counts().head(100)
top_neighborhoods

In [None]:
neighborhood_blocks = {neighborhood: [] for neighborhood in top_neighborhoods.keys()}
for geoid, weights in block_weights.items():
    for neighborhood, weight in weights.items():
        if neighborhood in neighborhood_blocks and weight > 0.3:  # increase this cutoff to tighten boundaries
            neighborhood_blocks[neighborhood].append(geoid)

In [None]:
neighborhood_geos = {
    name: block_gdf.loc[geoids].dissolve().geometry
    for name, geoids in neighborhood_blocks.items()
}

In [None]:
entropy_map = folium.Map(
    [40.65, -73.95],
    zoom_start=12,
    tiles="Cartodb Positron",
)
colors = ['#0099cd', '#ffca5d', '#00cd99', '#99cd00', '#cd0099', '#aa44ef', '#8dd3c7', '#bebada', '#fb8072', '#80b1d3', '#fdb462', '#b3de69', '#fccde5', '#bc80bd', '#ccebc5', '#ffed6f', '#ffffb3', '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#b15928', '#64ffda', '#00B8D4', '#A1887F', '#76FF03', '#DCE775', '#B388FF', '#FF80AB', '#D81B60', '#26A69A', '#FFEA00', '#6200EA']

folium.Choropleth(
    geo_data=nyc_gdf,
    data=nyc_gdf,
    columns=["GEOID20", "entropy"],
    key_on="feature.properties.GEOID20",
    name="Neighborhood uncertainty (Shannon entropy)",
    fill_color="OrRd",
    line_opacity=0.2,
    fill_opacity=0.5,
    show=False,
).add_to(entropy_map)

for idx, (neighborhood, geos) in enumerate(neighborhood_geos.items()):
    if len(geos) > 0:
        for geo in geos:
            if isinstance(geo, MultiPolygon):
                for part in geo.geoms:
                    folium.Polygon(
                        [(y, x) for x, y in part.exterior.coords],
                        weight=6,
                        tooltip=neighborhood,
                        color=colors[idx % len(colors)],
                    ).add_to(entropy_map)
            else:
                folium.Polygon(
                    [(y, x) for x, y in geo.exterior.coords],
                    weight=6,
                    tooltip=neighborhood,
                    color=colors[idx % len(colors)],
                ).add_to(entropy_map)

folium.LayerControl(collapsed=False).add_to(entropy_map)

In [None]:
entropy_map