# Clean Street Quality Data and Merge to Citibike Data

In [18]:
# Import geojson file into geopandas dataframe and remove streets with no rating
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

st_quality = gpd.read_file('../data/external/street-assessment.geojson')
st_quality = st_quality[['Rating_B', 'geometry']]
st_quality = st_quality[st_quality.Rating_B != 0 ]
st_quality.head()

Unnamed: 0,Rating_B,geometry
5,8,"LINESTRING (913321 123442, 913844 123551)"
6,8,"LINESTRING (913554 123789, 913588 123659)"
7,8,"LINESTRING (913844 123279, 913647 123244)"
8,7,"LINESTRING (913868 123836, 913554 123789)"
9,6,"LINESTRING (913357 124349, 913401 124530)"


In [21]:
st_quality = st_quality.to_crs(epsg=4326)

In [22]:
st_quality.head()

Unnamed: 0,Rating_B,geometry
5,8,"LINESTRING (913321 123442, 913844 123551)"
6,8,"LINESTRING (913554 123789, 913588 123659)"
7,8,"LINESTRING (913844 123279, 913647 123244)"
8,7,"LINESTRING (913868 123836, 913554 123789)"
9,6,"LINESTRING (913357 124349, 913401 124530)"


In [13]:
# Import citibike station csv into geopandas dataframe
from shapely.geometry import Point

stations = pd.read_csv('../data/processed/stations.csv')

geometry = gpd.GeoSeries([Point(xy) for xy in zip(stations.Longitude, stations.Latitude)])
geometry = geometry.buffer(.0005)
geo_stations = gpd.GeoDataFrame(stations, geometry=geometry)
geo_stations.crs = {'init' :'epsg:4326'}
geo_stations.to_file('geo_stations.shp')
geo_stations.head()

Unnamed: 0,Station_id,Station_Name,Location,Latitude,Longitude,geometry
0,72,W 52 St & 11 Ave,W 52 St & 11 Ave,40.767272,-73.993929,"POLYGON ((-73.99342888 40.76727216, -73.993431..."
1,79,Franklin St & W Broadway,Franklin St & W Broadway,40.719116,-74.006667,"POLYGON ((-74.00616660999999 40.71911552, -74...."
2,82,St James Pl & Pearl St,St James Pl & Pearl St,40.711174,-74.000165,"POLYGON ((-73.99966544999999 40.71117416, -73...."
3,83,Atlantic Ave & Fort Greene Pl,Atlantic Ave & Fort Greene Pl,40.683826,-73.976323,"POLYGON ((-73.97582328 40.68382604, -73.975825..."
4,116,W 17 St & 8 Ave,W 17 St & 8 Ave,40.741776,-74.001497,"POLYGON ((-74.00099745999999 40.74177603, -74...."


In [4]:
# Merge street quality data with citibike stations using Geopandas Spatial Merge
stations_st_quality = gpd.sjoin(st_quality, geo_stations, how="inner", op='intersects')
stations_st_quality.drop('index_right',axis=1, inplace=True)
stations_st_quality.head(10)

ValueError: need at least one array to concatenate

In [9]:
# Find average of street quality ratings for each station
stations_st_quality1 = stations_st_quality.groupby(['Station_id', 'Station_Name', 
                                                   'Location', 'Latitude', 'Longitude']).mean()
stations_st_quality1.reset_index(inplace=True)
stations_st_quality1.head()

Unnamed: 0,Station_id,Station_Name,Location,Latitude,Longitude,Rating_B
0,72,W 52 St & 11 Ave,W 52 St & 11 Ave,40.767272,-73.993929,8.0
1,79,Franklin St & W Broadway,Franklin St & W Broadway,40.719116,-74.006667,8.571429
2,82,St James Pl & Pearl St,St James Pl & Pearl St,40.711174,-74.000165,7.333333
3,83,Atlantic Ave & Fort Greene Pl,Atlantic Ave & Fort Greene Pl,40.683826,-73.976323,7.5
4,116,W 17 St & 8 Ave,W 17 St & 8 Ave,40.741776,-74.001497,8.5
