In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import matplotlib.pyplot as plt
import mplleaflet
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import requests
import json

### BUS STOPS

In [3]:
bus_stops_req = requests.get('https://services3.arcgis.com/rl7ACuZkiFsmDA2g/arcgis/rest/services/Transit_Stops_and_Routes/FeatureServer/0/query?where=1%3D1&outFields=stop_code,stop_name,stop_lat,stop_lon,stop_id,OBJECTID&returnGeometry=false&outSR=4326&f=json')
bus_stops = json.loads(bus_stops_req.text)

In [4]:
bus_stops['features'][0]

{'attributes': {'stop_code': '1001',
  'stop_name': 'Kennedy Rd S n/of First Gulf Blvd',
  'stop_lat': '43.673256',
  'stop_lon': '-79.718468',
  'stop_id': '00000020',
  'OBJECTID': 1}}

In [5]:
df_stops = json_normalize(bus_stops, ['features'])

# Convert lat and lon from object to numeric types
df_stops['attributes.stop_lat'] = pd.to_numeric(df_stops['attributes.stop_lat'])
df_stops['attributes.stop_lon'] = pd.to_numeric(df_stops['attributes.stop_lon'])

df_stops.head()

Unnamed: 0,attributes.stop_code,attributes.stop_name,attributes.stop_lat,attributes.stop_lon,attributes.stop_id,attributes.OBJECTID
0,1001,Kennedy Rd S n/of First Gulf Blvd,43.673256,-79.718468,20,1
1,1002,Kennedy Rd S at Steeles Ave E,43.675159,-79.72171,30,2
2,1005,Rutherford Rd S n/of Steeles Ave E,43.681385,-79.718147,55,3
3,1006,Rutherford Rd S/of Bramsteele Rd,43.681969,-79.718979,60,4
4,1008,Rutherford Rd S n/of Bramsteele Rd,43.683807,-79.721458,70,5


In [6]:
df_stops.describe()

Unnamed: 0,attributes.stop_lat,attributes.stop_lon,attributes.OBJECTID
count,2964.0,2964.0,2964.0
mean,43.711414,-79.741927,1482.5
std,0.042635,0.055206,855.777424
min,43.547852,-79.861496,1.0
25%,43.680982,-79.779841,741.75
50%,43.712059,-79.745064,1482.5
75%,43.742829,-79.705703,2223.25
max,43.819111,-79.496925,2964.0


### BUS ROUTES

In [7]:
bus_routes_req = requests.get('https://services3.arcgis.com/rl7ACuZkiFsmDA2g/arcgis/rest/services/Transit_Stops_and_Routes/FeatureServer/1/query?where=1%3D1&outFields=OBJECTID,shape_id,route_id,route_short_name,route_long_name,Shape__Length&outSR=4326&f=json')
bus_routes = json.loads(bus_routes_req.text)

In [8]:
bus_routes['features'][0]

{'attributes': {'OBJECTID': 1,
  'shape_id': '100022',
  'route_id': '10-273',
  'route_short_name': '10',
  'route_long_name': 'South Industrial',
  'Shape__Length': 11139.6410548243},
 'geometry': {'paths': [[[-79.718595121, 43.673217515],
    [-79.719074759, 43.67358755],
    [-79.7196101009999, 43.6739050840001],
    [-79.720123702, 43.6741368350001],
    [-79.720464178, 43.67428471],
    [-79.721268104, 43.6746898800001],
    [-79.721796217, 43.674998319],
    [-79.722634674, 43.6756478710001],
    [-79.721917471, 43.67629894],
    [-79.720071891, 43.6779746240001],
    [-79.718331225, 43.679554212],
    [-79.717236734, 43.6805566730001],
    [-79.7182580159999, 43.6813028500001],
    [-79.7190691979999, 43.6818953970001],
    [-79.719567647, 43.6822593380001],
    [-79.721551023, 43.6837410740001],
    [-79.722733282, 43.68462407],
    [-79.725164029, 43.6862557040001],
    [-79.725463748, 43.686457133],
    [-79.726483399, 43.6871726040001],
    [-79.728461276, 43.6885190990001]

In [9]:
df_routes = json_normalize(bus_routes, ['features'])


In [10]:
# print(len(df_routes['geometry.paths']))
sub_routes_per_route = [len(x) for x in df_routes['geometry.paths']]
print(sub_routes_per_route[:5])

# Total number of rows in each sub_route
tot_rows = [len(sub_route) for route in df_routes['geometry.paths'] for sub_route in route]
print(tot_rows[:5])

[1, 2, 1, 1, 1]
[105, 4, 117, 82, 173]


In [11]:
def concat_sub_lists(row):
    return pd.DataFrame([inner for outer in row for inner in outer])

ndf = df_routes['geometry.paths']
ndf = ndf.apply(concat_sub_lists)
df_routes['geometry.paths'] = ndf

In [12]:
df_routes['geometry.paths'][1]

Unnamed: 0,0,1
0,-79.715623,43.679370
1,-79.714084,43.678205
2,-79.713757,43.677917
3,-79.713501,43.677593
4,-79.718561,43.673192
...,...,...
116,-79.720249,43.719532
117,-79.720490,43.719333
118,-79.721071,43.718830
119,-79.720659,43.718583


In [30]:
df_routes

Unnamed: 0,attributes.OBJECTID,attributes.shape_id,attributes.route_id,attributes.route_short_name,attributes.route_long_name,attributes.Shape__Length,geometry.paths
0,1,100022,10-273,10,South Industrial,11139.641055,0 1 0 -79.718595 43.6...
1,2,100023,10-273,10,South Industrial,12014.585063,0 1 0 -79.715623 43.6...
2,3,10157,1-273,1,Queen,15005.585198,0 1 0 -79.720791 43.718...
3,4,10181,1-273,1,Queen,24558.302194,0 1 0 -79.762969 43.6...
4,5,10188,1-273,1,Queen,13732.863721,0 1 0 -79.649620 43.766...
...,...,...,...,...,...,...,...
256,257,90029,9-273,9,Vodden,27041.802249,0 1 0 -79.724145 43.7...
257,258,90030,9-273,9,Vodden,24459.391111,0 1 0 -79.724145 43.7...
258,259,90031,9-273,9,Vodden,9858.114288,0 1 0 -79.720326 43.718...
259,260,920006,92-273,92,Bramalea GO Shuttle,11558.664321,0 1 0 -79.692624 43.7...


## Insights

In [29]:
bus_stop_map = pd.Series(df_stops['attributes.stop_name'].values,index=df_stops['attributes.stop_code']).to_dict()
bus_stop_map['1005']

'Rutherford Rd S n/of Steeles Ave E'

## Plotting

In [14]:
# Plot the stops
fig_stops, ax_stops = plt.subplots()

ax_stops.plot(df_stops['attributes.stop_lon'][:1000], df_stops['attributes.stop_lat'][:1000], 'r.')


# Display Inline
mplleaflet.display(fig=fig_stops)


In [34]:
# Plot the routes
fig_routes, ax_routes = plt.subplots()

for i in range(len(df_routes['geometry.paths'])):    
    ax_routes.plot(df_routes['geometry.paths'][i][0], df_routes['geometry.paths'][i][1], linewidth=2)

# Display Inline
mplleaflet.display(fig=fig_routes)

### Testing