# Clean Street Quality Data and Merge to Citibike Data

In [1]:
# Import geojson file into geopandas dataframe and remove streets with no rating
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

st_quality = gpd.read_file('../data/external/StreetAssessmentRating/StreetAssessmentRating.shp')
#st_quality = st_quality[['Rating_B', 'geometry']]
st_quality = st_quality[st_quality.Rating_B != 0 ]
st_quality = st_quality.to_crs({'init' :'epsg:4326'})
st_quality.head()

Unnamed: 0,Length,RatingDate,RatingFY_S,RatingWord,Rating_B,SegmentID,Shape_len,UsageClass,Width,geometry
5,465.0,2008-06-12,2008,GOOD,8,15,534.237775,,30.0,LINESTRING (-74.25505718173595 40.505211202870...
6,151.0,2014-10-17,2015,GOOD,8,16,134.372616,,30.0,LINESTRING (-74.25422295126857 40.506165510159...
7,211.0,2010-05-08,2010,GOOD,8,17,200.084982,,30.0,LINESTRING (-74.25317481600673 40.504767963667...
8,331.0,2014-10-17,2015,GOOD,7,20,317.498031,L,18.58,LINESTRING (-74.2530943012904 40.5062970118639...
9,213.0,2014-10-17,2015,FAIR,6,25,186.271308,,30.0,LINESTRING (-74.25493722035228 40.507701030695...


In [6]:
# Import citibike station csv into geopandas dataframe
from shapely.geometry import Point

stations = pd.read_csv('../data/external/citibike_station_info.csv')

geometry = gpd.GeoSeries([Point(xy) for xy in zip(stations.Longitude, stations.Latitude)])
geometry = geometry.buffer(.0005)
geo_stations = gpd.GeoDataFrame(stations, geometry=geometry)
geo_stations.crs = {'init' :'epsg:4326'}
geo_stations.head()

664

In [7]:
# Merge street quality data with citibike stations using Geopandas Spatial Merge
stations_st_quality = gpd.sjoin(st_quality, geo_stations, how="inner", op='intersects')
stations_st_quality.drop('index_right',axis=1, inplace=True)
stations_st_quality.head(10)

Unnamed: 0,Rating_B,geometry,Station_id,Station_Name,Location,Latitude,Longitude
13151,6,LINESTRING (-74.0160354085487 40.6746801873317...,3353,Reed St & Van Brunt St,Reed St & Van Brunt St,40.674784,-74.016128
13152,6,LINESTRING (-74.0160354085487 40.6746801873317...,3353,Reed St & Van Brunt St,Reed St & Van Brunt St,40.674784,-74.016128
13156,6,LINESTRING (-74.01542626954938 40.675199033079...,3353,Reed St & Van Brunt St,Reed St & Van Brunt St,40.674784,-74.016128
13157,8,LINESTRING (-74.01540147240183 40.677098420376...,3348,Coffey St & Conover St,Coffey St & Conover St,40.677236,-74.015665
13158,8,LINESTRING (-74.01540147240183 40.677098420376...,3348,Coffey St & Conover St,Coffey St & Conover St,40.677236,-74.015665
16090,7,LINESTRING (-74.0141791276173 40.6762641661040...,3348,Coffey St & Conover St,Coffey St & Conover St,40.677236,-74.015665
16102,8,LINESTRING (-74.01477788784943 40.677628243565...,3348,Coffey St & Conover St,Coffey St & Conover St,40.677236,-74.015665
13235,8,LINESTRING (-74.0164538909555 40.7050921783136...,2008,Little West St & 1 Pl,Little West St & 1 Pl,40.705693,-74.016777
13236,7,LINESTRING (-74.01631333471165 40.705525871109...,2008,Little West St & 1 Pl,Little West St & 1 Pl,40.705693,-74.016777
13237,8,LINESTRING (-74.01631333471165 40.705525871109...,2008,Little West St & 1 Pl,Little West St & 1 Pl,40.705693,-74.016777


In [9]:
# Find average of street quality ratings for each station
stations_st_quality1 = stations_st_quality.groupby(['Station_id', 'Station_Name', 
                                                   'Location', 'Latitude', 'Longitude']).mean()
stations_st_quality1.reset_index(inplace=True)
stations_st_quality1.head()

Unnamed: 0,Station_id,Station_Name,Location,Latitude,Longitude,Rating_B
0,72,W 52 St & 11 Ave,W 52 St & 11 Ave,40.767272,-73.993929,8.0
1,79,Franklin St & W Broadway,Franklin St & W Broadway,40.719116,-74.006667,8.571429
2,82,St James Pl & Pearl St,St James Pl & Pearl St,40.711174,-74.000165,7.333333
3,83,Atlantic Ave & Fort Greene Pl,Atlantic Ave & Fort Greene Pl,40.683826,-73.976323,7.5
4,116,W 17 St & 8 Ave,W 17 St & 8 Ave,40.741776,-74.001497,8.5
