We've obtained a community-district level dataset from DATA2GO.nyc created by the nonprofit Measure of America, which provides us the population by CD and percentage of adults who are obesity in each CD. The population counts are based on the 2013-2017 census estimates and obesity percentage are based on community health survey. 

In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import fiona
import pyproj
import shapely.geometry as geom

In [3]:
df_cd_population_obese = pd.read_excel('datasets/data2go/Data2Go_cd_population_obese.xlsx',
                                 header=15, 
                                 dtype={'GEO_ID':float}).loc[:, ['GEO_ID', 'GEO_LABEL', 
                                                               'GEO_DISPLAY_NAME',
                                                                'total_population_cd',
                                                                'obesity_cd']]


df_cd_population_obese

Unnamed: 0,GEO_ID,GEO_LABEL,GEO_DISPLAY_NAME,total_population_cd,obesity_cd
0,201.0,Bronx CD 001,Mott Haven and Melrose,68654.360372,4
1,202.0,Bronx CD 002,Hunts Point and Longwood,90962.0,4
2,203.0,Bronx CD 003,Morrisania and Crotona,157731.524396,10
3,204.0,Bronx CD 004,Highbridge and Concourse,111786.0,10
4,205.0,Bronx CD 005,Fordham and University Heights,51341.057528,10
5,206.0,Bronx CD 006,Belmont and East Tremont,139925.057704,13
6,207.0,Bronx CD 007,Kingsbridge Heights and Bedford,209980.311896,10
7,208.0,Bronx CD 008,Riverdale and Fieldston,222517.166119,11
8,209.0,Bronx CD 009,Parkchester and Soundview,114926.0,21
9,210.0,Bronx CD 010,Throgs Neck and Co-op City,134992.100622,34


Merge in the boundaries of each CD from the CD shape file

In [5]:
# project geometries to EPSG 4326 to match the projection of the obesity map
df_cd = gpd.read_file('datasets/Community Districts/geo_export_bf9282a4-4d98-4f1a-9606-0bf283c4c69d.shp').to_crs(fiona.crs.from_epsg(4326))
df_cd

Unnamed: 0,boro_cd,shape_area,shape_leng,geometry
0,311.0,1.031778e+08,51549.557899,"POLYGON ((-73.97299 40.60881, -73.97259 40.606..."
1,313.0,8.819569e+07,65821.875617,"POLYGON ((-73.98372 40.59582, -73.98305 40.595..."
2,312.0,9.952550e+07,52245.830495,"POLYGON ((-73.97140 40.64826, -73.97121 40.647..."
3,304.0,5.666322e+07,37008.100320,"POLYGON ((-73.89647 40.68234, -73.89653 40.682..."
4,206.0,4.266431e+07,35875.710998,"POLYGON ((-73.87185 40.84376, -73.87192 40.843..."
...,...,...,...,...
66,227.0,3.143201e+07,28391.629705,"POLYGON ((-73.87054 40.86967, -73.87053 40.869..."
67,401.0,1.715489e+08,90042.718108,"MULTIPOLYGON (((-73.90647 40.79018, -73.90251 ..."
68,402.0,1.398915e+08,71543.044665,"POLYGON ((-73.89792 40.75424, -73.89797 40.754..."
69,502.0,5.931981e+08,142669.724480,"MULTIPOLYGON (((-74.07347 40.57839, -74.07345 ..."


In [8]:
df_cd_pop = df_cd.merge(df_cd_population_obese, left_on='boro_cd', right_on='GEO_ID',
                            how='inner').sort_values('boro_cd')
df_cd_pop

Unnamed: 0,boro_cd,shape_area,shape_leng,geometry,GEO_ID,GEO_LABEL,GEO_DISPLAY_NAME,total_population_cd,obesity_cd
25,101.0,42686590.0,73762.393219,"MULTIPOLYGON (((-74.04388 40.69019, -74.04351 ...",101.0,Manhattan CD 001,Financial District,130217.345712,24
40,102.0,37689210.0,34130.595861,"POLYGON ((-73.99684 40.73736, -73.99362 40.736...",102.0,Manhattan CD 002,Greenwich Village and Soho,101000.735022,26
39,103.0,46879700.0,30468.3406,"POLYGON ((-73.98878 40.73397, -73.98718 40.733...",103.0,Manhattan CD 003,Lower East Side and Chinatown,103831.154791,32
41,104.0,49311790.0,67623.946684,"POLYGON ((-73.99394 40.77318, -73.99370 40.773...",104.0,Manhattan CD 004,Clinton and Chelsea,126139.483095,24
17,105.0,43790300.0,35288.3052,"POLYGON ((-73.97301 40.76428, -73.97141 40.763...",105.0,Manhattan CD 005,Midtown,193083.299602,21
53,106.0,38729090.0,42705.937484,"MULTIPOLYGON (((-73.96128 40.73016, -73.96128 ...",106.0,Manhattan CD 006,Stuyvesant Town and Turtle Bay,193731.204933,15
42,107.0,53152820.0,39863.701384,"POLYGON ((-73.95965 40.80116, -73.95848 40.800...",107.0,Manhattan CD 007,Upper West Side,103174.978364,28
44,108.0,55168800.0,53561.0969,"MULTIPOLYGON (((-73.94180 40.76905, -73.94257 ...",108.0,Manhattan CD 008,Upper East Side,164500.003324,28
43,109.0,41892180.0,34959.184938,"POLYGON ((-73.94014 40.83037, -73.93963 40.830...",109.0,Manhattan CD 009,Morningside Heights and Hamilton Heights,166931.655361,26
30,110.0,39084630.0,35825.2907,"POLYGON ((-73.93445 40.83598, -73.93456 40.835...",110.0,Manhattan CD 010,Central Harlem,88571.946738,41


In [9]:
df_cd_pop.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 59 entries, 25 to 48
Data columns (total 9 columns):
boro_cd                59 non-null float64
shape_area             59 non-null float64
shape_leng             59 non-null float64
geometry               59 non-null geometry
GEO_ID                 59 non-null float64
GEO_LABEL              59 non-null object
GEO_DISPLAY_NAME       59 non-null object
total_population_cd    59 non-null float64
obesity_cd             59 non-null int64
dtypes: float64(5), geometry(1), int64(1), object(2)
memory usage: 4.6+ KB


Export as GeoJSON

In [10]:
df_cd_pop.to_file("datasets/Population_Obesity_CD.geojson", driver='GeoJSON')
