# Example: CitiBike data

Adapted from Kelsey Jordahl
https://gist.github.com/kjordahl/5957573

In [1]:
import requests  
import json
from pyproj import Proj
from shapely.geometry import Point
import geopandas as gp
import pandas as pd

NYC borough boundaries downloaded from [Bytes of the Big Apple](http://www.nyc.gov/html/dcp/download/bytes/nybb_13a.zip)

In [2]:
boros = gp.GeoDataFrame.from_file('nybb_15b/nybb.shp')
boros.crs

{'proj': 'lcc',
 'lat_1': 40.66666666666666,
 'lat_2': 41.03333333333333,
 'lat_0': 40.16666666666666,
 'lon_0': -74,
 'x_0': 300000,
 'y_0': 0,
 'datum': 'NAD83',
 'units': 'us-ft',
 'no_defs': True}

Load real time bike station data from [CitiBike](http://citibikenyc.com) json API:

In [3]:
# get the bike data and convert it to a ditcionary
endpoint_url = 'http://citibikenyc.com/stations/json'
response = requests.get(endpoint_url)
data = json.loads(response.text)

In [4]:
# convert the relevant part to a geodataframe
df = gp.GeoDataFrame(data['stationBeanList'])
df

Unnamed: 0,altitude,availableBikes,availableDocks,city,id,landMark,lastCommunicationTime,latitude,location,longitude,postalCode,stAddress1,stAddress2,stationName,statusKey,statusValue,testStation,totalDocks
0,,6,26,,304,,2019-02-28 12:38:29 PM,40.704633,,-74.013617,,Broadway & Battery Pl,,Broadway & Battery Pl,1,In Service,False,33
1,,15,44,,359,,2019-02-28 12:34:54 PM,40.755103,,-73.974987,,E 47 St & Park Ave,,E 47 St & Park Ave,1,In Service,False,64
2,,12,33,,377,,2019-02-28 12:37:10 PM,40.722438,,-74.005664,,6 Ave & Canal St,,6 Ave & Canal St,1,In Service,False,45
3,,27,10,,402,,2019-02-28 12:37:07 PM,40.740343,,-73.989551,,Broadway & E 22 St,,Broadway & E 22 St,1,In Service,False,39
4,,8,10,,3255,,2019-02-28 12:37:52 PM,40.750585,,-73.994685,,8 Ave & W 31 St,,8 Ave & W 31 St,1,In Service,False,19
5,,18,21,,3443,,2019-02-28 12:37:04 PM,40.761330,,-73.979820,,W 52 St & 6 Ave,,W 52 St & 6 Ave,1,In Service,False,41
6,,32,23,,72,,2019-02-28 12:36:59 PM,40.767272,,-73.993929,,W 52 St & 11 Ave,,W 52 St & 11 Ave,1,In Service,False,55
7,,32,0,,79,,2019-02-28 12:36:26 PM,40.719116,,-74.006667,,Franklin St & W Broadway,,Franklin St & W Broadway,1,In Service,False,33
8,,23,3,,82,,2019-02-28 12:36:49 PM,40.711174,,-74.000165,,St James Pl & Pearl St,,St James Pl & Pearl St,1,In Service,False,27
9,,33,29,,83,,2019-02-28 12:36:17 PM,40.683826,,-73.976323,,Atlantic Ave & Fort Greene Pl,,Atlantic Ave & Fort Greene Pl,1,In Service,False,62


In [5]:
# there is one row for each bike station.  How many stations are there? 
len(df)

814

In [6]:
# in the file above, there are lon-lats, but no geometry field
# we need to set that up

s = gp.GeoSeries([Point(x, y) for x, y in zip(df['longitude'], df['latitude'])])
df['geometry'] = s
df.crs = {'init': 'epsg:4326', 'no_defs': True}
df.geometry.total_bounds
df.crs

{'init': 'epsg:4326', 'no_defs': True}

In [7]:
# make sure they are on the same CRS.  
# checking the bounds is a nice way of seeing this
df.to_crs(boros.crs, inplace=True)
df.geometry.total_bounds

array([ 961064.73659426,  178056.37414829, 1009803.53178432,
        235986.42166716])

In [8]:
# the geometry objects can do lots of cool stuff.  For example: 

manhattan = boros.geometry[3]
in_mn = df.geometry.within(manhattan)
print(sum(in_mn), 'stations in Manhattan')

415 stations in Manhattan


# Your turn

You can read about the range of operations available in geopandas here: 
    
http://geopandas.org/index.html

Your assignment is to: 

1. Calculate how many stations are in each borough
2. Calculate how many bikes are currently available in each borough
3. Read about and try at least two new spatial or geometric operations (beyond what I've covered here). 

### 1. Calculate how many stations are in each borough

In [9]:
# join the citibike data with boros using the within operation
citibikes_boros = gp.sjoin(df, boros, how="left", op='within')

In [10]:
#Note, 50 stations are not within any boro
sum(citibikes_boros.BoroName.isna())

50

In [11]:
c = citibikes_boros.BoroName.value_counts().to_frame(name='stations')
# Add boros with 0 stations
pd.merge(boros[['BoroName']],c, how='left', right_index=True, left_on= 'BoroName').fillna(0)

Unnamed: 0,BoroName,stations
0,Staten Island,0.0
1,Brooklyn,267.0
2,Queens,82.0
3,Manhattan,415.0
4,Bronx,0.0


### 2. Calculate how many bikes are currently available in each borough

In [12]:
# get sum of bikes
bikes = citibikes_boros.groupby('BoroName')['availableBikes'].sum()
bikes

BoroName
Brooklyn     3375
Manhattan    7003
Queens        401
Name: availableBikes, dtype: int64

In [13]:
# Add boros with 0 bikes
bikes = bikes.to_frame(name='bikes')
pd.merge(boros[['BoroName']],bikes, right_index=True, left_on= 'BoroName', how='left').fillna(0)

Unnamed: 0,BoroName,bikes
0,Staten Island,0.0
1,Brooklyn,3375.0
2,Queens,401.0
3,Manhattan,7003.0
4,Bronx,0.0


### 3. Read about and try at least two new spatial or geometric operations (beyond what I've covered here).

1. sjoin() - Used in exercise one
2. dissolve(): It dissolves all the geometries within a given group together into a single geometric feature
3. touches(): It checks if two objects has at least one point in common and the interior of one does not intersect with any part of the other.

In [14]:
try_touches = boros.BoroName.to_frame()
for index, row in boros.iterrows():
    try_touches[row.BoroName] = boros.geometry.touches(row.geometry)
    
try_touches

Unnamed: 0,BoroName,Staten Island,Brooklyn,Queens,Manhattan,Bronx
0,Staten Island,False,False,False,False,False
1,Brooklyn,False,False,True,True,False
2,Queens,False,True,False,True,True
3,Manhattan,False,True,True,False,True
4,Bronx,False,False,True,True,False
