# New York City H3 Cells

## Packages and Utility Functions

In [None]:
from functools import partial
from itertools import chain, count
from toolz import compose_left as compose, do, juxt
from html import escape
from IPython.display import display, display_html
import h3
import numpy as np
import pandas as pd
import shapely
from shapely.geometry import Polygon, MultiPolygon, Point
import geopandas as gpd
import h3pandas
import altair as alt
import gpdvega
import folium
import random

display_html = partial(display_html, raw=True)

def load_geocsv(filepath_or_buffer, index_col, geo_col):
    df = pd.read_csv(filepath_or_buffer,
                     index_col=index_col,
                     converters={geo_col: shapely.wkt.loads},
                     dtype='object') \
           .rename(columns={geo_col: 'geometry'})
    return gpd.GeoDataFrame(df)

def get_random_numbers(minimum, maximum):
    return (random.uniform(minimum, maximum) for _ in count())

## Loading Data

In [None]:
borough_areas = load_geocsv('https://data.cityofnewyork.us/api/views/jbrz-qt9e/rows.csv?accessType=DOWNLOAD', 'BoroCode', 'the_geom')
borough_land = load_geocsv('https://data.cityofnewyork.us/api/views/7t3b-ywvw/rows.csv?accessType=DOWNLOAD', 'BoroCode', 'the_geom')
nta_areas = load_geocsv('https://data.cityofnewyork.us/resource/9nt8-h7nd.csv', 'nta2020', 'the_geom')

## Generating Boundaries and Visualizing H3 Cells of New York City

`nyc_area` is used to test whether points or polygons fall within New York City’s boundaries. This includes water. `nyc_land`, however, is used to test whether points or polygons falll within a land mass in New York City. It’s used to exclude points and polygons that are partly or entirely in water.

`h3_polygons` is used to uniformly group areas within New York City. Each polygon is uniform in size. However, they may cross state, city, borough, or neighborhood boundaries.

In [None]:
nyc_area, nyc_land = (gdf.get(['geometry']) \
                         .dissolve() \
                         .explode(index_parts=False) \
                         .exterior \
                         .apply(Polygon) \
                         .agg(compose(gpd.GeoSeries.to_list, MultiPolygon))
                      for gdf in (borough_areas, borough_land))
h3_polygons = gpd.GeoDataFrame.from_records(({'h3_index': h3.string_to_h3(h3_index),
                                              'contains_only_water': polygon.disjoint(nyc_land),
                                              'geometry': polygon}
                                             for h3_index in h3.h3_to_children(h3.geo_to_h3(*reversed(nyc_area.centroid.coords[0]), 3), 9)
                                             if (polygon := Polygon(h3.h3_to_geo_boundary(h3_index, True))).intersects(nyc_area)),
                                            'h3_index')
for name, obj in {'NYC Area': nyc_area,
                  'NYC Land': nyc_land,
                  'Boroughs Areas': borough_areas,
                  'Boroughs Land': borough_land,
                  'Neighborhood Areas': nta_areas,
                  'H3 Polygons': h3_polygons}.items():
    display_html(f'<h1>{escape(name)}</h1>')
    display(obj)

In [None]:
boro_nta2020_h3_relationships = h3_polygons.sjoin(borough_areas, how='left', lsuffix='h3', rsuffix='boro') \
                                           .sjoin(nta_areas, how='left', lsuffix='', rsuffix='nta') \
                                           .pipe(lambda df: df.assign(borocode=np.where(df.borocode.isna(), df.index_boro, df.borocode))) \
                                           .get(['borocode', 'index_nta']) \
                                           .rename(columns={'borocode': 'boro_code', 'index_nta': 'nta2020_code'})

### Visualization

Visualization with Altair will fail. There are too many polygons generated by the H3 library for resolution 9.

In [None]:
alt.layer(alt.Chart(borough_areas) \
             .mark_geoshape(color='#c6dbef') \
             .encode(),
          alt.Chart(nta_areas) \
             .mark_geoshape() \
             .encode(color=alt.Color('borocode',
                                     scale=alt.Scale(scheme='dark2'),
                                     legend=None)),
          alt.Chart(h3_polygons.assign(h3_index=h3_polygons.index.map(h3.h3_to_string))) \
             .mark_geoshape() \
             .encode(color=alt.Color(value='transparent'),
                     stroke=alt.Color(value='black'))) \
   .properties(width=800,
               height=800)

However, visualization with Folium will succeed.

In [None]:
do(juxt(folium.Choropleth(geo_data=borough_areas.to_json(),
                          name='Borough Boundaries',
                          fill_color='none',
                          line_color='black',
                          line_opacity=0.5) \
              .add_to,
        folium.Choropleth(geo_data=h3_polygons.to_json(),
                          name='H3 Tiles',
                          fill_color='gray',
                          fill_opacity=0.5,
                          line_color='#808080',
                          line_opacity=0.5) \
              .add_to,
        lambda map: [folium.CircleMarker(location := attributes.geometry.representative_point().coords[0][::-1],
                                         tooltip=f'<strong>{attributes.ntaname}</strong>, {attributes.boroname}<br />'
                                                 f'<em>({location[0]:.6}, {location[1]:.6})</em><br />'
                                                 f'{len(boro_nta2020_h3_relationships.get(boro_nta2020_h3_relationships.nta2020_code == nta2020_code)):,} hexagon(s)',
                                         radius=3,
                                         color='black',
                                         fill=True,
                                         fill_opacity=0.75,
                                         stroke=False)
                           .add_to(map)
                     for nta2020_code, attributes in nta_areas.iterrows()]),
   folium.Map(location=nyc_area.representative_point().coords[0][::-1],
              tiles='OpenStreetMap',
              zoom_start=11))

## Saving the Geography Data

### Borough Boundaries

In [None]:
borough_areas.reset_index() \
             .rename(columns={'BoroCode': 'boro_code',
                              'BoroName': 'boro_name'}) \
             .get(['boro_code', 'boro_name', 'geometry']) \
             .sort_values('boro_code') \
             .to_csv('boro.csv', index=False)

### Neighborhood Boundaries

In [None]:
nta_areas.reset_index() \
         .rename(columns={'nta2020': 'nta2020_code',
                          'borocode': 'boro_code',
                          'ntaname': 'nta2020_name'}) \
         .get(['nta2020_code', 'nta2020_name', 'boro_code', 'geometry']) \
         .sort_values('nta2020_code') \
         .to_csv('nta2020.csv', index=False)

### H3 Cells

In [None]:
h3_polygons.sort_index() \
           .to_csv('h3_cell.csv')

### Relationships between Boroughts, Neighborhoods, and H3 Cells

In [None]:
boro_nta2020_h3_relationships.sort_index() \
                             .sort_values(['boro_code', 'nta2020_code']) \
                             .to_csv('boro_nta2020_h3.csv')