In [296]:
import geopandas as gpd
import pandas as pd
import numpy as np
import urllib
import requests
import os
import io
import json
import pylab as pl
import shapely
import choroplethNYC as cp
#from fiona.crs import from_epsg

%pylab inline

import scipy as sp
from scipy import stats

Populating the interactive namespace from numpy and matplotlib


# Download Parks Inspection Data

In [270]:
#Loading csv of park inspections into dataframe 
inspection = pd.read_csv("https://data.cityofnewyork.us/api/views/yg3y-7juh/rows.csv?accessType=DOWNLOAD")

In [271]:
#looking through the Overall Celanliness column 
parksinspection = inspection

# Convert the "Overall condition" column to a numerical value.

In [272]:
parksinspection['Overall Condition'] = parksinspection['Overall Condition'].map({'A': 1, 'U': 0, 'N': np.nan})

In [273]:
parksinspection['Overall Condition'].dtype

dtype('float64')

# Group the dataframe by park park id and get the mean condition for each park.

In [274]:
parksinspection.head()

Unnamed: 0,Prop ID,AMPSDistrict,Inspection ID,Season,Round,Date,BeginInspection,EndInspection,Inspection Year,inspector,inspector2,Overall Condition,Cleanliness,Safety Condition,Structural Condition,VisitorCount,Closed?,Comments,InspectionType,inspAddedDate
0,Q066A,1,68214,Summer,1,05/24/2010 12:00:00 AM,12:30 PM,12:46 PM,2010,3,,1.0,A,,,,Under Construction.,"Site closed for bridge maintenance, exterior r...",PIP,2010-05-24 14:40:00
1,QZ284,4,68220,Summer,1,05/24/2010 12:00:00 AM,11:45 AM,11:55 AM,2010,15,,1.0,A,,,,,,PIP,2010-05-24 14:56:00
2,Q121A,12,68227,Summer,1,05/24/2010 12:00:00 AM,11:05 AM,11:35 AM,2010,18,,1.0,A,,,,,,PIP,2010-05-24 15:23:00
3,Q301,12,68228,Summer,1,05/24/2010 12:00:00 AM,10:20 AM,10:54 AM,2010,18,,0.0,U,,,,,,PIP,2010-05-24 15:37:00
4,M196,3,68234,Summer,1,05/25/2010 12:00:00 AM,09:25 AM,09:37 AM,2010,6,,1.0,A,,,,Partial Constr./Rest of Site Rated.,Most of park closed for school repair.,PIP,2010-05-25 12:07:00


In [275]:
parksinspection['Overall Condition'] = parksinspection['Overall Condition'].dropna(axis=0)

In [276]:
parksinspection.head()

Unnamed: 0,Prop ID,AMPSDistrict,Inspection ID,Season,Round,Date,BeginInspection,EndInspection,Inspection Year,inspector,inspector2,Overall Condition,Cleanliness,Safety Condition,Structural Condition,VisitorCount,Closed?,Comments,InspectionType,inspAddedDate
0,Q066A,1,68214,Summer,1,05/24/2010 12:00:00 AM,12:30 PM,12:46 PM,2010,3,,1.0,A,,,,Under Construction.,"Site closed for bridge maintenance, exterior r...",PIP,2010-05-24 14:40:00
1,QZ284,4,68220,Summer,1,05/24/2010 12:00:00 AM,11:45 AM,11:55 AM,2010,15,,1.0,A,,,,,,PIP,2010-05-24 14:56:00
2,Q121A,12,68227,Summer,1,05/24/2010 12:00:00 AM,11:05 AM,11:35 AM,2010,18,,1.0,A,,,,,,PIP,2010-05-24 15:23:00
3,Q301,12,68228,Summer,1,05/24/2010 12:00:00 AM,10:20 AM,10:54 AM,2010,18,,0.0,U,,,,,,PIP,2010-05-24 15:37:00
4,M196,3,68234,Summer,1,05/25/2010 12:00:00 AM,09:25 AM,09:37 AM,2010,6,,1.0,A,,,,Partial Constr./Rest of Site Rated.,Most of park closed for school repair.,PIP,2010-05-25 12:07:00


In [277]:
parksinspection = parksinspection.groupby('Prop ID', as_index=False)['Overall Condition'].mean()

In [278]:
parksinspection.head()

Unnamed: 0,Prop ID,Overall Condition
0,B001,0.897436
1,B002,0.761905
2,B003,1.0
3,B006,1.0
4,B007,0.314286


# 2. Download parks shapefile for the NYC Open Data data.

In [279]:
#checking environmental variable is set properly
os.getenv("PUIDATA")

'/nfshome/ram844/PUIdata'

In [280]:
#downloading the zipped package containing the parks inspection shapefile to my PUIdata folder
url = "https://data.cityofnewyork.us/api/geospatial/k2ya-ucmv?method=export&format=Shapefile"
urllib.request.urlretrieve(url, "parkfile.gz")
!unzip -d $PUIDATA parkfile.gz

Archive:  parkfile.gz
  inflating: /nfshome/ram844/PUIdata/geo_export_13c21d79-254c-42b3-88c7-327a22ac7bee.dbf  
  inflating: /nfshome/ram844/PUIdata/geo_export_13c21d79-254c-42b3-88c7-327a22ac7bee.shp  
  inflating: /nfshome/ram844/PUIdata/geo_export_13c21d79-254c-42b3-88c7-327a22ac7bee.shx  
  inflating: /nfshome/ram844/PUIdata/geo_export_13c21d79-254c-42b3-88c7-327a22ac7bee.prj  


In [281]:
#putting parks inspection shapefile into geopandas dataframe
geoparksinspection = gpd.GeoDataFrame.from_file(os.getenv("PUIDATA") + "/" + "geo_export_084a16e9-3dbb-4516-a6fe-91c6184a1641.shp")
geoparksinspection

Unnamed: 0,acquisitio,acres,address,borough,class,commission,communityb,councildis,department,eapply,...,precinct,retired,signname,subcategor,typecatego,url,us_congres,waterfront,zipcode,geometry
0,19440801000000.00000,0.991,,M,PLGD,20100106000000.00000,111,9,M-11,Abraham Lincoln Playground,...,25,False,Abraham Lincoln Playground,Neighborhood Plgd,Community Park,http://www.nycgovparks.org/parks/M193/,13,No,10037,POLYGON ((-73.93682693707335 40.81184552631012...
1,,0.030,,Q,PARK,20090423000000.00000,413,23,Q-13,Father Reilly Square,...,105,False,Father Reilly Square,Sitting Area/Triangle/Mall,Triangle/Plaza,http://www.nycgovparks.org/parks/Q159/,5,No,11428,POLYGON ((-73.73628053979716 40.72267578865961...
2,19360824000000.00000,0.911,2300 Ryer Avenue,X,PARK,20100106000000.00000,205,15,X-05,Slattery Playground,...,46,False,Slattery Playground,Neighborhood Plgd,Playground,http://www.nycgovparks.org/parks/X085/,15,No,10458,POLYGON ((-73.89825077067158 40.85693358482933...
3,19520206000000.00000,0.189,349 RODNEY STREET,B,PARK,20100106000000.00000,301,34,B-01,Rodney Playground Center,...,90,False,Rodney Playground Center,Sitting Area/Triangle/Mall,Neighborhood Park,http://www.nycgovparks.org/parks/B223PD/,7,No,11211,POLYGON ((-73.95533291494969 40.71042213200856...
4,19240403000000.00000,0.554,124 11 STREET,B,PARK,20110712000000.00000,306,39,B-06,Ennis Playground,...,78,False,Ennis Playground,Neighborhood Plgd,Playground,http://www.nycgovparks.org/parks/B095/,7,No,11215,POLYGON ((-73.99424820391394 40.67057586455565...
5,19380606000000.00000,0.036,,B,PARK,20100106000000.00000,301,33,B-01,Pvt. Sonsire Triangle,...,94,False,Pvt. Sonsire Triangle,Sitting Area/Triangle/Mall,Triangle/Plaza,http://www.nycgovparks.org/parks/B196/,12,No,11211,POLYGON ((-73.95241848162225 40.71828618172859...
6,20030304000000.00000,0.124,,R,PARK,20100106000000.00000,501,49,R-01,Richmond Terrace Cemetery,...,120,False,Richmond Terrace Cemetery,Cemetery,Cemetery,http://www.nycgovparks.org/parks/R154/,11,No,10310,POLYGON ((-74.12099740656076 40.63948244108408...
7,19291120000000.00000,920.426,,R,PARK,20100106000000.00000,"502, 503",5051,R-02,Freshkills Park,...,122,False,Freshkills Park,Flagship Park,Undeveloped,http://www.nycgovparks.org/parks/R017/,11,Yes,"10312, 10314",(POLYGON ((-74.16735131457774 40.5895298406544...
8,19550127000000.00000,1.210,9920 SEAVIEW AVENUE,B,PARK,20100106000000.00000,318,46,B-18,Bayview Playground (PS 272),...,69,False,Bayview Playground,JOP,Jointly Operated Playground,http://www.nycgovparks.org/parks/B274/,8,No,11236,POLYGON ((-73.88605616693891 40.63518371430879...
9,19340626000000.00000,0.567,358 SCHERMERHORN STREET,B,PARK,20100106000000.00000,302,33,B-02,Sixteen Sycamores Playground,...,84,False,Sixteen Sycamores Playground,Neighborhood Plgd,Playground,http://www.nycgovparks.org/parks/B137/,8,No,11217,"POLYGON ((-73.9811229077025 40.68694745390135,..."


# Extract the Brooklyn park properties from this file 

In [282]:
bkParks = geoparksinspection[geoparksinspection["borough"] == "B"] # reduces to only Brooklyn portion of the data frame
bkParks

Unnamed: 0,acquisitio,acres,address,borough,class,commission,communityb,councildis,department,eapply,...,precinct,retired,signname,subcategor,typecatego,url,us_congres,waterfront,zipcode,geometry
3,19520206000000.00000,0.189,349 RODNEY STREET,B,PARK,20100106000000.00000,301,34,B-01,Rodney Playground Center,...,90,False,Rodney Playground Center,Sitting Area/Triangle/Mall,Neighborhood Park,http://www.nycgovparks.org/parks/B223PD/,7,No,11211,POLYGON ((-73.95533291494969 40.71042213200856...
4,19240403000000.00000,0.554,124 11 STREET,B,PARK,20110712000000.00000,306,39,B-06,Ennis Playground,...,78,False,Ennis Playground,Neighborhood Plgd,Playground,http://www.nycgovparks.org/parks/B095/,7,No,11215,POLYGON ((-73.99424820391394 40.67057586455565...
5,19380606000000.00000,0.036,,B,PARK,20100106000000.00000,301,33,B-01,Pvt. Sonsire Triangle,...,94,False,Pvt. Sonsire Triangle,Sitting Area/Triangle/Mall,Triangle/Plaza,http://www.nycgovparks.org/parks/B196/,12,No,11211,POLYGON ((-73.95241848162225 40.71828618172859...
8,19550127000000.00000,1.210,9920 SEAVIEW AVENUE,B,PARK,20100106000000.00000,318,46,B-18,Bayview Playground (PS 272),...,69,False,Bayview Playground,JOP,Jointly Operated Playground,http://www.nycgovparks.org/parks/B274/,8,No,11236,POLYGON ((-73.88605616693891 40.63518371430879...
9,19340626000000.00000,0.567,358 SCHERMERHORN STREET,B,PARK,20100106000000.00000,302,33,B-02,Sixteen Sycamores Playground,...,84,False,Sixteen Sycamores Playground,Neighborhood Plgd,Playground,http://www.nycgovparks.org/parks/B137/,8,No,11217,"POLYGON ((-73.9811229077025 40.68694745390135,..."
10,19350809000000.00000,1.240,,B,PARK,20100106000000.00000,304,37,B-04,Heckscher Playground,...,83,False,Heckscher Playground,Neighborhood Plgd,Playground,http://www.nycgovparks.org/parks/B139/,7,No,11221,"POLYGON ((-73.91761203826853 40.6953723016629,..."
11,19420209000000.00000,1.171,,B,PARK,20100106000000.00000,302,33,B-02,Steuben Playground,...,88,False,Steuben Playground,Neighborhood Plgd,Neighborhood Park,http://www.nycgovparks.org/parks/B221/,7,No,11205,POLYGON ((-73.96344411176511 40.69695233754151...
17,19400606000000.00000,0.197,4814 KINGS HIGHWAY,B,PARK,20100106000000.00000,318,45,B-18,Sunners Playground,...,63,False,Sunners Playground,Neighborhood Plgd,Playground,http://www.nycgovparks.org/parks/B206/,8,No,11234,POLYGON ((-73.92943855562463 40.63239670833995...
27,19611026000000.00000,1.616,10002 GLENWOOD ROAD,B,PARK,20100106000000.00000,318,46,B-18,100% Playground,...,69,False,100% Playground,JOP,Jointly Operated Playground,http://www.nycgovparks.org/parks/B342/,8,No,11236,POLYGON ((-73.89939837273964 40.64625496420079...
32,19500309000000.00000,1.225,126 BRIGHTWATER COURT,B,PARK,20100106000000.00000,313,48,B-13,Brighton Playground,...,60,False,Brighton Playground,Neighborhood Plgd,Playground,http://www.nycgovparks.org/parks/B169A/,8,Yes,11235,POLYGON ((-73.96593741470511 40.57491015635568...


In [283]:
bkParks.department(['B-', np.nan]).str.replace('B0', regex=True)

TypeError: 'Series' object is not callable

In [284]:
bkParks.shape

(608, 36)

In [285]:
#ax = bkParks.plot(column="condition", k=3, cmap="RdBu", legend=True, figsize=(10,10) )
#ax.set_ylabel("latitude", fontsize=20)
#ax.set_xlabel("longitude", fontsize=20);

# 3 Download unemployment rate information from ACS 2015 (5 year survey) for Brooklyn at the PUMA geographical aggregation level.

In [None]:
#url = "https://api.census.gov/data/2015/acs/acs1/variables.json"
#resp = requests.request('GET', url)
#aff1y = json.loads(resp.text)

In [286]:
#acs15_final.csv collected using wget
pumaUnemployment = pd.read_csv(os.getenv("PUIDATA") + "/acs15_final.csv")

In [287]:
pumaUnemployment.head()

Unnamed: 0.1,Unnamed: 0,unemployedF,total,public use microdata area
0,0,0.118098,1304,100
1,1,0.120651,1782,200
2,2,0.086304,1066,300
3,3,0.117403,724,401
4,4,0.083603,2476,402


# 4. Obtain the NYC shapefile for PUMA

In [288]:
#downloading the zipped package containing the NYC shapefile for PUMA and unzipping to my PUIdata folder
url2 = "https://data.cityofnewyork.us/api/geospatial/cwiz-gcty?method=export&format=Shapefile"
urllib.request.urlretrieve(url2, "pumafile.gz")
!unzip -d $PUIDATA pumafile.gz


Archive:  pumafile.gz
  inflating: /nfshome/ram844/PUIdata/geo_export_fbcad0c5-6f8d-4459-a361-9d2b942c5358.dbf  
  inflating: /nfshome/ram844/PUIdata/geo_export_fbcad0c5-6f8d-4459-a361-9d2b942c5358.shp  
  inflating: /nfshome/ram844/PUIdata/geo_export_fbcad0c5-6f8d-4459-a361-9d2b942c5358.shx  
  inflating: /nfshome/ram844/PUIdata/geo_export_fbcad0c5-6f8d-4459-a361-9d2b942c5358.prj  


In [289]:
#putting puma shapefile into geopandas dataframe

pumashp = gpd.GeoDataFrame.from_file(os.getenv("PUIDATA") + "/" + 
                                     "geo_export_88087a8c-7575-4cbe-bb64-41eb51e0f4a0.shp")

In [290]:
pumashp.head()

Unnamed: 0,puma,shape_area,shape_leng,geometry
0,3701,97928520.0,53227.144461,POLYGON ((-73.89641133483133 40.90450452082026...
1,3702,188986000.0,106050.002302,"POLYGON ((-73.8442314689986 40.86942035096838,..."
2,3703,267013700.0,304071.257466,(POLYGON ((-73.78833349834532 40.8346671297593...
3,3704,106212900.0,47970.901277,"POLYGON ((-73.84792614069238 40.8713422330779,..."
4,4015,81054380.0,50007.415493,POLYGON ((-73.95374853778468 40.63858633758547...


In [None]:
# combine lat and lon to one column
pumashp['lonlat'] = list(zip(linkNYC.longitude, linkNYC.latitude))
# Create Point Geometry for based on lonlat column
linkNYC['geometry'] = linkNYC[['lonlat']].applymap(lambda x:shapely.geometry.Point(x))

In [299]:
pumashp.plot

<bound method GeoDataFrame.plot of     puma    shape_area     shape_leng  \
0   3701  9.792852e+07   53227.144461   
1   3702  1.889860e+08  106050.002302   
2   3703  2.670137e+08  304071.257466   
3   3704  1.062129e+08   47970.901277   
4   4015  8.105438e+07   50007.415493   
5   4016  1.207135e+08  109477.751817   
6   3705  1.224951e+08   68657.316149   
7   3706  4.388687e+07   51799.404183   
8   3707  4.228113e+07   37347.925798   
9   4006  6.184965e+07   42555.996557   
10  3708  5.589695e+07   34852.437966   
11  3709  1.241177e+08   73288.964465   
12  3710  1.377966e+08   91086.012338   
13  3801  8.124958e+07   64103.138480   
14  4011  4.762886e+07   34965.040151   
15  4012  1.123044e+08   97261.951304   
16  3802  4.689908e+07   37925.566208   
17  4014  8.768042e+07   51089.073159   
18  3803  3.984279e+07   38064.749400   
19  3804  6.461212e+07   62599.500502   
20  3805  5.516881e+07   53561.096534   
21  3806  8.849786e+07   47538.426032   
22  3807  8.560261e+07

# 5 Download the file assigning each park to a Puma

In [301]:
parkByPuma = pd.read_csv("https://serv.cusp.nyu.edu/~fbianco/PUIdata/parkToPuma.csv")

In [302]:
parkByPuma.head()

Unnamed: 0,puma,condition,gispropnum
0,4001,0.633277,52
1,4002,0.456086,17
2,4003,0.384951,23
3,4004,0.648573,49
4,4005,0.718519,29


# 6 Data Aggregation

merge the dataframes and geodataframes (pumas, park inspections, unemployment), group them so that each PUMA has

the unemployed fraction
the average park condition
the number of parks per PUMA
the shape of the area (PUMA)
Your deliverable for this task is a dataframe like the one below

You can find the solution here - if you use this solution you will not get the 10 points for this deliverable (but you can go back later to this! and if you do get the solution later you will get the points!) You will stil have to secure the PUMA shape file if you want to make the plots and merge it with this file



In [306]:
bkmerged = pumashp.merge(parkByPuma, left_on='puma', right_on='puma', how='outer')

In [307]:
bkmerged.head()

Unnamed: 0,puma,shape_area,shape_leng,geometry,condition,gispropnum
0,3701,97928520.0,53227.144461,POLYGON ((-73.89641133483133 40.90450452082026...,,
1,3702,188986000.0,106050.002302,"POLYGON ((-73.8442314689986 40.86942035096838,...",,
2,3703,267013700.0,304071.257466,(POLYGON ((-73.78833349834532 40.8346671297593...,,
3,3704,106212900.0,47970.901277,"POLYGON ((-73.84792614069238 40.8713422330779,...",,
4,4015,81054380.0,50007.415493,POLYGON ((-73.95374853778468 40.63858633758547...,,
