# Example of creating a distance matrix using OpenTripPlanner

Steps:
1. create your dataframe
2. set up distance function
3. loop through dataframe calculating distances

In [None]:
## import Python packages ##
import time # to convert time as needed and report how long some functions take

# interacting with websites and web-APIs
import requests # easy way to interact with web sites and services
import json # read/write JavaScript Object Notation (JSON)

# data manipulation
import pandas as pd # easy data manipulation
import geopandas as gpd # geographic data manipulation
from shapely.geometry import Point, LineString # to create lines from a list of points

# pull data from database
from sqlalchemy import create_engine

# visualization
import matplotlib as mplib
import matplotlib.pyplot as plt # visualization package

# so images get plotted in the notebook
%matplotlib inline

In [None]:
# database connection
conn = create_engine("postgresql://stuffed/appliedda")

The query used below simply pulls lat/lon coordiants for the centroid of Chicago neighborhood polygons. PostGIS has great documentation online, and all the `ST_...` functions are a part of the PostGIS extension to PostgreSQL.

In [None]:
# get data, example using chicago neighborhood centroids as both origin and destination points

origin_qry = """
SELECT neighborhood as o_name, 
    ST_Y(ST_Centroid(ST_Transform(geom, 4326))) as lat, 
    ST_X(ST_Centroid(ST_Transform(geom, 4326))) as lon
FROM chicago_nhoods
"""

dest_qry = """
SELECT neighborhood as d_name, 
    ST_Y(ST_Centroid(ST_Transform(geom, 4326))) as lat, 
    ST_X(ST_Centroid(ST_Transform(geom, 4326))) as lon
FROM chicago_nhoods
"""

# pull data from 
origins = pd.read_sql(origin_qry, conn)
destinations = pd.read_sql(dest_qry, conn)

In [None]:
base_url = 'https://tripplanner.adrf.info/otp/routers/'

# Set up query URL
qry_url = '{}index/feeds'.format(base_url)

# Again, since we are still using HTTP, we can use the requests package's get 
response = requests.get(qry_url)

In [None]:
# example query
# base URL where OTP is installed 
# - end point "routers/" simply lists information about the router
base_url = "chicago_20171023/"

date = '2017-11-15'

origin_lat = origins['lat'][0]
origin_lon = origins['lon'][0]
destination_lat = destinations['lat'][5]
destination_lon = destinations['lon'][5]

qry_url = '{}plan?fromPlace={},{}&toPlace={},{}&date={}'\
.format(base_url, origin_lat, origin_lon, destination_lat, destination_lon, date)   
print(qry_url)

In [None]:
response = requests.get(qry_url)
response = response.text
plan = json.loads(response)

# Examine the response, which is a routing plan:
print(plan)

In [None]:
# the plan will return up to three trip options as a list:
len(plan['plan']['itineraries'])

In [None]:
# what 'keys' does each trip return? let's check the first trip plan returned:
print(plan['plan']['itineraries'][0].keys())

In [None]:
# what is included in 'legs'
print(plan['plan']['itineraries'][0]['legs'][0].keys())

In [None]:
# get total distance and time for a route
# duration was simply in trip summary info
duration = plan['plan']['itineraries'][0]['duration']
# but only WalkDistance was included in this example
distance = 0
for leg in plan['plan']['itineraries'][0]['legs']:
    # add leg's distance
    distance += leg['distance']
    
print('1st trip returned had')
print('duration (seconds): {:.1f}'.format(duration))
print('distance (meters): {:.1f}'.format(distance))

In [None]:
# which neighborhoods are these?
print(origins['o_name'][0])
print(destinations['d_name'][5])

Checking on Google Maps it should take less than an hour (53 minutes) so all in all pretty good. Note if you do not add a date & time OTP will run assuming _now_

In [None]:
# now compare duration & distance of all 3 trips returned:
trip_durs = []
trip_dist = []

for trip in plan['plan']['itineraries']:
    # add duration to list
    trip_durs.append(trip['duration'])
    # get distance
    dist = 0
    for leg in plan['plan']['itineraries'][0]['legs']:
        # add leg's distance
        dist += leg['distance']
    # add distance to trip list
    trip_dist.append(dist)
    
# how do trips compare?
print(trip_durs)
print(trip_dist)

All trips are same distance, from Googling it looks like there are a few different bus options that are slightly different but give essentially same result.

In [None]:
# get position of fastest trip
pd.np.argmin(trip_durs)

In [None]:
# so let's do this for all origin-destination pairs
# what data do we want to collect:
o_name = []
d_name = []
durations = []
distances = []
queries = []

start_time = time.time()

# do for just first 5 origins and last 5 destinations
for oid, origin in origins.loc[:5,:].iterrows():
    for did, destin in destinations.loc[-5:,:].iterrows():
        # set up this query
        qry_url = '{}plan?fromPlace={},{}&toPlace={},{}'\
.format(base_url, origin['lat'], origin['lon'], destin['lat'], destin['lon'])
        # get plan
        response = requests.get(qry_url)
        response = response.text
        plan = json.loads(response)
#         print('success') # for testing
        # now compare duration & distance of all 3 trips returned:
        trip_durs = []
        trip_dist = []

        # confirm a trip plan returned:
        if 'plan' in plan.keys(): 
            for trip in plan['plan']['itineraries']:
                # add duration to list
                trip_durs.append(trip['duration'])
                # get distance
                dist = 0
                for leg in plan['plan']['itineraries'][0]['legs']:
                    # add leg's distance
                    dist += leg['distance']
                # add distance to trip list
                trip_dist.append(dist)
            # which trip was fastest
            dur_id = pd.np.argmin(trip_durs)

            # update outputs
            o_name.append(origin['o_name'])
            d_name.append(destin['d_name'])
            durations.append(trip_durs[dur_id])
            distances.append(trip_dist[dur_id])
            queries.append(qry_url)
        else: # no plan returned for pair
            # update outputs
            o_name.append(origin['o_name'])
            d_name.append(destin['d_name'])
            durations.append(None)
            distances.append(None)
            queries.append(None)
        
# create dataframe from outputs
orig_dest_df = pd.DataFrame({'origin': o_name, 'destin': d_name, 
                             'duration_seconds': durations,
                            'distance_meters': distances, 'query': queries})
end_time = time.time()
print('analysis took {:.3f}'.format(end_time - start_time))
orig_dest_df.head()

In [None]:
qry = """
SELECT a.new_zip, a.location, b.o_lon, b.o_lat, b.d_lon, b.d_lat,
max(ST_Distance(ST_Transform(ST_SetSRID(ST_Point(b.o_lon, b.o_lat), 4326), 2163),
ST_Transform(ST_SetSRID(ST_Point(b.d_lon, b.d_lat), 4326), 2163))) dist_meters
FROM ada_class3_uw_2.midd1 AS a
JOIN ada_class3_uw_2.o_d_latlons b
ON a.new_zip = b.new_zip::int AND a.location = b.location
GROUP BY a.new_zip, a.location, b.o_lon, b.o_lat, b.d_lon, b.d_lat
"""


o_d_table = pd.read_sql(qry, conn)
o_d_table.head()

In [None]:
o_d_table.shape

## distance matrix loop

In [None]:

# - end point "routers/" simply lists information about the router
base_url = "https://tripplanner.adrf.info/otp/routers/seattle_20171023/"

In [None]:
# so let's do this for all origin-destination pairs
# what data do we want to collect:
o_name = []
d_name = []
durations = []
distances = []
queries = []

start_time = time.time()

for did, o_d in o_d_table.iterrows():
    # set up this query
    qry_url = '{}plan?fromPlace={},{}&toPlace={},{}'\
.format(base_url, o_d['o_lat'], o_d['o_lon'], o_d['d_lat'], o_d['d_lon'])
    # get plan
    response = requests.get(qry_url)
    response = response.text
    plan = json.loads(response)
#         print('success') # for testing
    # now compare duration & distance of all 3 trips returned:
    trip_durs = []
    trip_dist = []

    o_name.append(o_d['new_zip'])
    d_name.append(o_d['location'])
    
    # confirm a trip plan returned:
    if 'plan' in plan.keys(): 
        for trip in plan['plan']['itineraries']:
            # add duration to list
            trip_durs.append(trip['duration'])
            # get distance
            dist = 0
            for leg in trip['legs']:
                # add leg's distance
                dist += leg['distance']
            # add distance to trip list
            trip_dist.append(dist)
            
        # get first trip's duration - alternate to speed up analysis
#         first_trip = plan['plan']['itineraries'][0]
#         durations.append(first_trip['duration'])
        # which trip was fastest
#         dur_id = pd.np.argmin(trip_durs)

        # update outputs
        durations.append(trip_durs[dur_id])
        distances.append(trip_dist[dur_id])
        queries.append(qry_url)
    else: # no plan returned for pair
        # update outputs
        durations.append(None)
        distances.append(None)
        queries.append(None)
        
# create dataframe from outputs
orig_dest_df = pd.DataFrame({'origin': o_name, 'destin': d_name, 
                             'duration_seconds': durations,
                            'distance_meters': distances, 'query': queries})
end_time = time.time()
print('analysis took {:.3f}'.format(end_time - start_time))
orig_dest_df.head()


In [None]:
orig_dest_df.to_sql('o_d_times', conn, schema='ada_class3_uw_2', index=False)

In [None]:
orig_dest_df.shape

In [None]:

for did, o_d in o_d_table.loc[:5,:].iterrows():
    print(did)

In [None]:
o_d_table.iloc[-5:,:]