In [1]:
import pandas as pd
import numpy as np
from keplergl import KeplerGl
from pyproj import CRS
from matplotlib import pyplot as plt
import os

  from pkg_resources import resource_string


In [2]:
df = pd.read_parquet(r'C:\Data\Citibike_NY_2022\merged\df_weather_duration.parquet')

In [3]:
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,date,AWND,PRCP,TAVG,_merge,trip_duration
337365,FB33E3D8F21E2941,electric_bike,2022-01-01 01:49:37.374,2022-01-01 01:57:50.346,Canal St & Rutgers St,5303.08,Norfolk St & Broome St,5374.01,40.714275,-73.9899,40.717227,-73.988021,casual,2022-01-01,28,193,116,both,8.2162
934147,755337295F178067,electric_bike,2022-01-01 03:21:09.754,2022-01-01 03:49:33.047,Lewis Ave & Madison St,4425.02,Columbia St & Degraw St,4422.04,40.686312,-73.935775,40.68593,-74.002424,member,2022-01-01,28,193,116,both,28.388217
227118,C62CA87E3A475ADD,classic_bike,2022-01-01 08:38:18.156,2022-01-01 08:47:54.213,Carlton Ave & Park Ave,4732.04,Emerson Pl & Myrtle Ave,4683.02,40.695807,-73.973556,40.693631,-73.962236,casual,2022-01-01,28,193,116,both,9.60095
913138,CD7A2098AFCD5514,classic_bike,2022-01-01 10:33:58.529,2022-01-01 10:47:05.197,W 100 St & Broadway,7580.01,W 67 St & Broadway,7116.04,40.797372,-73.970412,40.774925,-73.982666,casual,2022-01-01,28,193,116,both,13.111133
127328,3F9E0C51F49F78A3,electric_bike,2022-01-01 20:05:19.592,2022-01-01 20:12:00.661,W 18 St & 6 Ave,6064.08,W 42 St & 6 Ave,6517.08,40.739713,-73.994564,40.75492,-73.98455,member,2022-01-01,28,193,116,both,6.684483


In [4]:
df.shape

(29596596, 19)

In [7]:
# creating aggregated df with # of trips between stations
df_trips = (
    df.groupby(['start_station_name', 'end_station_name'])
      .size()
      .reset_index(name='value')
      .sort_values(by=['value'],
                   ascending=False)
      .copy()
)


In [8]:
df_trips.head(20)

Unnamed: 0,start_station_name,end_station_name,value
292912,Central Park S & 6 Ave,Central Park S & 6 Ave,11579
146804,7 Ave & Central Park South,7 Ave & Central Park South,8097
777076,Roosevelt Island Tramway,Roosevelt Island Tramway,7962
544522,Grand Army Plaza & Central Park S,Grand Army Plaza & Central Park S,6869
795170,Soissons Landing,Soissons Landing,6679
890893,W 21 St & 6 Ave,9 Ave & W 22 St,6338
118619,5 Ave & E 72 St,5 Ave & E 72 St,5891
6506,1 Ave & E 62 St,1 Ave & E 68 St,5823
1006462,Yankee Ferry Terminal,Yankee Ferry Terminal,5249
21285,12 Ave & W 40 St,12 Ave & W 40 St,5243


In [9]:
# check number of trips
print(df_trips['value'].sum())

29596596


merging coordinates onto aggregated df

In [13]:
# first capturing coordinates
start_locs = df[['start_station_name', 'start_lat', 'start_lng']].drop_duplicates(subset='start_station_name')


In [14]:
end_locs = df[['end_station_name', 'end_lat', 'end_lng']].drop_duplicates(subset='end_station_name')

In [17]:
df_trips = df_trips.merge(start_locs, on='start_station_name', how='left')
df_trips.head()

Unnamed: 0,start_station_name,end_station_name,value,start_lat,start_lng
0,Central Park S & 6 Ave,Central Park S & 6 Ave,11579,40.765909,-73.976342
1,7 Ave & Central Park South,7 Ave & Central Park South,8097,40.766741,-73.979069
2,Roosevelt Island Tramway,Roosevelt Island Tramway,7962,40.757284,-73.9536
3,Grand Army Plaza & Central Park S,Grand Army Plaza & Central Park S,6869,40.764397,-73.973715
4,Soissons Landing,Soissons Landing,6679,40.692317,-74.014866


In [18]:
df_trips = df_trips.merge(end_locs, how='left', on='end_station_name')

In [19]:
df_trips.head()

Unnamed: 0,start_station_name,end_station_name,value,start_lat,start_lng,end_lat,end_lng
0,Central Park S & 6 Ave,Central Park S & 6 Ave,11579,40.765909,-73.976342,40.765909,-73.976342
1,7 Ave & Central Park South,7 Ave & Central Park South,8097,40.766741,-73.979069,40.766741,-73.979069
2,Roosevelt Island Tramway,Roosevelt Island Tramway,7962,40.757284,-73.9536,40.757284,-73.9536
3,Grand Army Plaza & Central Park S,Grand Army Plaza & Central Park S,6869,40.764397,-73.973715,40.764397,-73.973715
4,Soissons Landing,Soissons Landing,6679,40.692317,-74.014866,40.692317,-74.014866


## Plotting map with KeplerGL

In [20]:
# Create KeplerGl instance

m = KeplerGl(height = 700, data={"data_1": df_trips})
m

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'data_1':                         start_station_name                   end_station_name  \
0   …

Using orange to blue for colourblind, but also because it's the knicks colours.
Filtered out trips with less than 900 since there were far too many lines on map.
Red/dark orange dot represents higher amount of trips starting from there.
We can see that up along the West coast of lower manhattan there are a number of popular routes, as well as a large number number of short routes between different locations between 30th-50th street. There's also a number of popular routes in Brooklyn that end around the Williamsburg bridge. 

In [30]:
# save settings
config = m.config

In [31]:
import json
with open("config.json", "w") as outfile:
    json.dump(config, outfile)

In [32]:
m.save_to_html(file_name = '../visualisations/NY_trips.html', read_only = False, config = config)

Map saved to ../visualisations/NY_trips.html!
