In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import matplotlib.pyplot as plt
import mplleaflet
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import requests
import json

### BUS STOPS

In [3]:
bus_stops_req = requests.get('https://services3.arcgis.com/rl7ACuZkiFsmDA2g/arcgis/rest/services/Transit_Stops_and_Routes/FeatureServer/0/query?where=1%3D1&outFields=stop_code,stop_name,stop_lat,stop_lon,stop_id,OBJECTID&returnGeometry=false&outSR=4326&f=json')
bus_stops = json.loads(bus_stops_req.text)

In [4]:
bus_stops['features'][0]

{'attributes': {'stop_code': '1001',
  'stop_name': 'Kennedy Rd S n/of First Gulf Blvd',
  'stop_lat': '43.673256',
  'stop_lon': '-79.718468',
  'stop_id': '00000020',
  'OBJECTID': 1}}

In [5]:
df_stops = json_normalize(bus_stops, ['features'])

# Convert lat and lon from object to numeric types
df_stops['attributes.stop_lat'] = pd.to_numeric(df_stops['attributes.stop_lat'])
df_stops['attributes.stop_lon'] = pd.to_numeric(df_stops['attributes.stop_lon'])

# replace empty rows with nothing in the stop_code to NAN and drop those rows 
df_stops['attributes.stop_code'].replace('', np.nan, inplace=True)
df_stops.dropna(subset=['attributes.stop_code'], inplace=True)

df_stops.head()

Unnamed: 0,attributes.stop_code,attributes.stop_name,attributes.stop_lat,attributes.stop_lon,attributes.stop_id,attributes.OBJECTID
0,1001,Kennedy Rd S n/of First Gulf Blvd,43.673256,-79.718468,20,1
1,1002,Kennedy Rd S at Steeles Ave E,43.675159,-79.72171,30,2
2,1005,Rutherford Rd S n/of Steeles Ave E,43.681385,-79.718147,55,3
3,1006,Rutherford Rd S/of Bramsteele Rd,43.681969,-79.718979,60,4
4,1008,Rutherford Rd S n/of Bramsteele Rd,43.683807,-79.721458,70,5


In [6]:
df_stops.describe()

Unnamed: 0,attributes.stop_lat,attributes.stop_lon,attributes.OBJECTID
count,1865.0,1865.0,1865.0
mean,43.709818,-79.735491,933.0
std,0.043916,0.058301,538.523444
min,43.591827,-79.861496,1.0
25%,43.677818,-79.777969,467.0
50%,43.709282,-79.739311,933.0
75%,43.74131,-79.695847,1399.0
max,43.819111,-79.52787,1865.0


### BUS ROUTES

In [7]:
bus_routes_req = requests.get('https://services3.arcgis.com/rl7ACuZkiFsmDA2g/arcgis/rest/services/Transit_Stops_and_Routes/FeatureServer/1/query?where=1%3D1&outFields=OBJECTID,shape_id,route_id,route_short_name,route_long_name,Shape__Length&outSR=4326&f=json')
bus_routes = json.loads(bus_routes_req.text)

In [8]:
bus_routes['features'][1]['geometry']['paths']

[[[-79.7156231791047, 43.6793698452164],
  [-79.7140836605824, 43.6782045412685],
  [-79.7137573238694, 43.677917243767],
  [-79.7135012513777, 43.6775929173563]],
 [[-79.718560933262, 43.6731919218436],
  [-79.7178100066018, 43.6726297521368],
  [-79.7157856478455, 43.6744285269198],
  [-79.7156275917276, 43.6745690053502],
  [-79.7154411956924, 43.6747001690151],
  [-79.7151122842183, 43.6748828535335],
  [-79.7147294343504, 43.6750352213363],
  [-79.7143967287224, 43.6751629323373],
  [-79.7140705192123, 43.675334841778],
  [-79.7137980945753, 43.6755442617592],
  [-79.7135212617016, 43.6758436893133],
  [-79.7132920728739, 43.6762354706773],
  [-79.7132094129543, 43.676562291582],
  [-79.7132970162398, 43.6771360353016],
  [-79.7135012513777, 43.6775929173563],
  [-79.7133043325967, 43.6771711216319],
  [-79.7132592019035, 43.6769608190639],
  [-79.712338827891, 43.6770711033299],
  [-79.7118438202018, 43.6771303504259],
  [-79.7109309774752, 43.6772362184579],
  [-79.7106480300922

In [9]:
df_routes = json_normalize(bus_routes, ['features'])
# df_routes.set_index('attributes.shape_id', inplace=True)


In [10]:
df_routes[:12]

Unnamed: 0,attributes.OBJECTID,attributes.shape_id,attributes.route_id,attributes.route_short_name,attributes.route_long_name,attributes.Shape__Length,geometry.paths
0,1,100022,10-280,10,South Industrial,11139.641055,"[[[-79.718595121, 43.673217515], [-79.71907475..."
1,2,100023,10-280,10,South Industrial,12014.585063,"[[[-79.7156231791047, 43.6793698452164], [-79...."
2,3,10157,1-280,1,Queen,15005.585198,"[[[-79.7207905069999, 43.7187270320001], [-79...."
3,4,10188,1-280,1,Queen,13732.863721,"[[[-79.6496195989999, 43.7669375460001], [-79...."
4,5,10196,1-280,1,Queen,38079.060051,"[[[-79.6496195820342, 43.7669375667098], [-79...."
5,6,10197,1-280,1,Queen,13588.848506,"[[[-79.822946052, 43.6746458870001], [-79.8230..."
6,7,10210,1-280,1,Queen,36943.784014,"[[[-79.6496195820342, 43.7669375667098], [-79...."
7,8,10211,1-280,1,Queen,23197.873134,"[[[-79.7618357938897, 43.6869748977028], [-79...."
8,9,110068,11-280,11,Steeles,22659.817991,"[[[-79.6049677459999, 43.7289499140001], [-79...."
9,10,110073,11-280,11,Steeles,37452.216512,"[[[-79.6049677459999, 43.7289499140001], [-79...."


In [11]:
# print(len(df_routes['geometry.paths']))
sub_routes_per_route = [len(x) for x in df_routes['geometry.paths']]
print(sub_routes_per_route[:5])

# Total number of rows in each sub_route
tot_rows = [len(sub_route) for route in df_routes['geometry.paths'] for sub_route in route]
print(tot_rows[:5])



[1, 2, 1, 1, 2]
[105, 4, 117, 82, 76]


In [12]:
rdf = df_routes['geometry.paths'].copy()
print(rdf)
for i in range(len(df_routes['geometry.paths'])):
    for j in range(len(df_routes['geometry.paths'][i])):
        rdf[i][j] = pd.DataFrame(df_routes['geometry.paths'][i][j])

0     [[[-79.718595121, 43.673217515], [-79.71907475...
1     [[[-79.7156231791047, 43.6793698452164], [-79....
2     [[[-79.7207905069999, 43.7187270320001], [-79....
3     [[[-79.6496195989999, 43.7669375460001], [-79....
4     [[[-79.6496195820342, 43.7669375667098], [-79....
                            ...                        
95    [[[-79.694915727, 43.637713552], [-79.69694108...
96    [[[-79.798128564, 43.7278887780001], [-79.7983...
97    [[[-79.824479748, 43.7333886930001], [-79.8249...
98    [[[-79.694915727, 43.637713552], [-79.69694108...
99    [[[-79.7218657829999, 43.6750522200001], [-79....
Name: geometry.paths, Length: 100, dtype: object


In [13]:
## This snippet flattens all lists for each row, however this is not what we want as it connect separate subroutes together
# def concat_sub_lists(row):
#     return pd.DataFrame([inner for outer in row for inner in outer])
    
    
# ndf = df_routes['geometry.paths']
# ndf = ndf.apply(concat_sub_lists)
# df_routes['geometry.paths'] = ndf

In [14]:
# pd.DataFrame(df_routes['geometry.paths'][1])
# type(df_routes['geometry.paths'][1][0][0])
# yy = df_routes['geometry.paths'].str.split(", ",expand=True)

## Insights

In [15]:
bus_stop_map = pd.Series(df_stops['attributes.stop_name'].values,index=df_stops['attributes.stop_code']).to_dict()
bus_stop_map['1005']

'Rutherford Rd S n/of Steeles Ave E'

In [16]:
# filter = df_stops['attributes.stop_name'] == 'Kennedy Rd S n/of First Gulf Blvd'
# df_stops.loc['attributes.stop_name']
# df_stops.loc['Kennedy Rd S n/of First Gulf Blvd']
# df_stops[-20:]

## Plotting

In [17]:
# Plot the stops
fig_stops, ax_stops = plt.subplots()

ax_stops.plot(df_stops['attributes.stop_lon'][:1000], df_stops['attributes.stop_lat'][:1000], 'r.')


# Display Inline
mplleaflet.display(fig=fig_stops)


In [18]:
# Plot the routes
fig_routes, ax_routes = plt.subplots()

for i in range(len(df_routes['geometry.paths'])):
    for j in range(len(df_routes['geometry.paths'][i])):
        ax_routes.plot(rdf[i][j][0], rdf[i][j][1], linewidth=2)


# Display Inline
mplleaflet.display(fig=fig_routes)

### Testing