Notebook that uses Schedule_Functions.py functions to calculate relevant schedule metrics.
#### Matthew Samach

In [1]:
import pandas as pd
import numpy as np
import Schedule_Functions as sf

First reading in routes relevant to our study

In [2]:
# Will use routes_df to merge in all metrics and eventually output
routes_df = pd.read_csv("../Data/System_Routes.csv")
routes = routes_df.Route

service_table = pd.read_csv("../Data/PAAC_Service_Table.csv")
service_table['route_no'] = service_table.route_no.apply(lambda x: str(x))

schedules = service_table
schedules = pd.merge(routes_df, service_table, left_on = "Route", right_on = "route_no")

  interactivity=interactivity, compiler=compiler, result=result)


Dividing routes by weekday/sat/sun and inbound/outbound. Also getting peak and off peak dataframes.

In [3]:
wkday = schedules[schedules.wkdy_service == "Yes"]
sat = schedules[schedules.sat_service == "Yes"]
sun = schedules[schedules.sun_service == "Yes"]

inbound_wkday = schedules[(schedules.direction_id == "Inbound") & (schedules.wkdy_service == "Yes")]
inbound_sat   = schedules[(schedules.direction_id == "Inbound") & (schedules.sat_service == "Yes")]
inbound_sun   = schedules[(schedules.direction_id == "Inbound") & (schedules.sun_service == "Yes")]

outbound_wkday = schedules[(schedules.direction_id == "Outbound") & (schedules.wkdy_service == "Yes")]
outbound_sat   = schedules[(schedules.direction_id == "Outbound") & (schedules.sat_service == "Yes")]
outbound_sun   = schedules[(schedules.direction_id == "Outbound") & (schedules.sun_service == "Yes")]

inbound_wkday_pk, inbound_wkday_offpk = sf.peakHours(inbound_wkday)
inbound_sat_pk, inbound_sat_offpk = sf.peakHours(inbound_sat)
inbound_sun_pk, inbound_sun_offpk = sf.peakHours(inbound_sun)

outbound_wkday_pk, outbound_wkday_offpk = sf.peakHours(outbound_wkday)
outbound_sat_pk, outbound_sat_offpk = sf.peakHours(outbound_sat)
outbound_sun_pk, outbound_sun_offpk = sf.peakHours(outbound_sun)

### Weekday

Trips inbound and outbound

In [4]:
# Inbound
ibcount_df = pd.DataFrame(columns = ["Route", "inbound_trips"])

for r in routes:
    count = sf.countTrips(inbound_wkday[inbound_wkday.route_no==r])
    ibcount_df = ibcount_df.append(pd.DataFrame(zip([r], [count]), columns=["Route", "inbound_trips"]))
    
# Outbound
obcount_df = pd.DataFrame(columns = ["Route", "outbound_trips"])
for r in routes:
    count = sf.countTrips(outbound_wkday[outbound_wkday.route_no==r])
    obcount_df = obcount_df.append(pd.DataFrame(zip([r], [count]), columns=["Route", "outbound_trips"]))
    
routes_df = pd.merge(routes_df, ibcount_df)
routes_df = pd.merge(routes_df, obcount_df)

Start time inbound and outbound

In [5]:
# Inbound
ibstart_df = pd.DataFrame(columns = ["Route", "inbound_start"])

for r in routes:
    start = sf.startTime(inbound_wkday[inbound_wkday.route_no==r])
    ibstart_df = ibstart_df.append(pd.DataFrame(zip([r], [start]), columns=["Route", "inbound_start"]))
    
# Outbound
obstart_df = pd.DataFrame(columns = ["Route", "outbound_start"])

for r in routes:
    start = sf.startTime(outbound_wkday[outbound_wkday.route_no==r])
    obstart_df = obstart_df.append(pd.DataFrame(zip([r], [start]), columns=["Route", "outbound_start"]))
    
routes_df = pd.merge(routes_df, ibstart_df)
routes_df = pd.merge(routes_df, obstart_df)

Average headway: peak/nonpeak and inbound/outbound

In [6]:
# Inbound, peak
ibpk_hw_df = pd.DataFrame(columns = ["Route", "inbound_pk_headway"])

for r in routes:
    hw = sf.avgHeadway(inbound_wkday[inbound_wkday.route_no==r], inbound_wkday_pk)
    ibpk_hw_df = ibpk_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "inbound_pk_headway"]))
    
# Inbound, offpeak
ibop_hw_df = pd.DataFrame(columns = ["Route", "inbound_offpk_headway"])

for r in routes:
    hw = sf.avgHeadway(inbound_wkday[inbound_wkday.route_no==r], inbound_wkday_offpk)
    ibop_hw_df = ibop_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "inbound_offpk_headway"]))
    
# Outbound, peak
obpk_hw_df = pd.DataFrame(columns = ["Route", "outbound_pk_headway"])

for r in routes:
    hw = sf.avgHeadway(outbound_wkday[outbound_wkday.route_no==r], outbound_wkday_pk)
    obpk_hw_df = obpk_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "outbound_pk_headway"]))
    
# Outbound, offpeak
oboffpk_hw_df = pd.DataFrame(columns = ["Route", "outbound_offpk_headway"])

for r in routes:
    hw = sf.avgHeadway(outbound_wkday[outbound_wkday.route_no==r], outbound_wkday_offpk)
    oboffpk_hw_df = oboffpk_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "outbound_offpk_headway"]))
    
routes_df = pd.merge(routes_df, ibpk_hw_df)
routes_df = pd.merge(routes_df, ibop_hw_df)
routes_df = pd.merge(routes_df, obpk_hw_df)
routes_df = pd.merge(routes_df, oboffpk_hw_df)

Peak vehicles

In [7]:
pv_df = pd.DataFrame(columns = ["Route", "PV"])

for r in routes:
    pv = sf.peak_vehicle(wkday[wkday.route_no==r])
    pv_df = pv_df.append(pd.DataFrame(zip([r], [pv]), columns = ["Route", "PV"]))

routes_df = pd.merge(routes_df, pv_df)

Average trip time: peak/nonpeak and inbound/outbound

In [8]:
# Inbound, peak
ibpk_time_df = pd.DataFrame(columns = ["Route", "inbound_pk_time"])

for r in routes:
    temp = pd.merge(inbound_wkday[inbound_wkday.route_no==r], inbound_wkday_pk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    ibpk_time_df = ibpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "inbound_pk_time"]))
    
# Inbound, offpeak
iboffpk_time_df = pd.DataFrame(columns = ["Route", "inbound_offpk_time"])

for r in routes:
    temp = pd.merge(inbound_wkday[inbound_wkday.route_no==r], inbound_wkday_offpk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    iboffpk_time_df = iboffpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "inbound_offpk_time"]))
    
# Outbound, peak
obpk_time_df = pd.DataFrame(columns = ["Route", "outbound_pk_time"])

for r in routes:
    temp = pd.merge(outbound_wkday[outbound_wkday.route_no==r], outbound_wkday_pk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    obpk_time_df = obpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "outbound_pk_time"]))
    
# Outbound, offpeak
oboffpk_time_df = pd.DataFrame(columns = ["Route", "outbound_offpk_time"])

for r in routes:
    temp = pd.merge(outbound_wkday[outbound_wkday.route_no==r], outbound_wkday_offpk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    oboffpk_time_df = oboffpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "outbound_offpk_time"]))

routes_df = pd.merge(routes_df, ibpk_time_df)
routes_df = pd.merge(routes_df, iboffpk_time_df)
routes_df = pd.merge(routes_df, obpk_time_df)
routes_df = pd.merge(routes_df, oboffpk_time_df)

In [9]:
routes_df.to_csv("../Data/weekday_service_metrics.csv", index=False)

### Saturday

In [10]:
routes_df = pd.read_csv("../Data/System_Routes.csv")

Trips inbound and outbound

In [11]:
# Inbound
ibcount_df = pd.DataFrame(columns = ["Route", "inbound_trips"])

for r in routes:
    count = sf.countTrips(inbound_sat[inbound_sat.route_no==r])
    ibcount_df = ibcount_df.append(pd.DataFrame(zip([r], [count]), columns=["Route", "inbound_trips"]))
    
# Outbound
obcount_df = pd.DataFrame(columns = ["Route", "outbound_trips"])
for r in routes:
    count = sf.countTrips(outbound_sat[outbound_sat.route_no==r])
    obcount_df = obcount_df.append(pd.DataFrame(zip([r], [count]), columns=["Route", "outbound_trips"]))
    
routes_df = pd.merge(routes_df, ibcount_df)
routes_df = pd.merge(routes_df, obcount_df)

Start time inbound and outbound

In [12]:
# Inbound
ibstart_df = pd.DataFrame(columns = ["Route", "inbound_start"])

for r in routes:
    start = sf.startTime(inbound_sat[inbound_sat.route_no==r])
    ibstart_df = ibstart_df.append(pd.DataFrame(zip([r], [start]), columns=["Route", "inbound_start"]))
    
# Outbound
obstart_df = pd.DataFrame(columns = ["Route", "outbound_start"])

for r in routes:
    start = sf.startTime(outbound_sat[outbound_sat.route_no==r])
    obstart_df = obstart_df.append(pd.DataFrame(zip([r], [start]), columns=["Route", "outbound_start"]))
    
routes_df = pd.merge(routes_df, ibstart_df)
routes_df = pd.merge(routes_df, obstart_df)

Average headway: peak/nonpeak and inbound/outbound

In [13]:
# Inbound, peak
ibpk_hw_df = pd.DataFrame(columns = ["Route", "inbound_pk_headway"])

for r in routes:
    hw = sf.avgHeadway(inbound_sat[inbound_sat.route_no==r], inbound_sat_pk)
    ibpk_hw_df = ibpk_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "inbound_pk_headway"]))
    
# Inbound, offpeak
ibop_hw_df = pd.DataFrame(columns = ["Route", "inbound_offpk_headway"])

for r in routes:
    hw = sf.avgHeadway(inbound_sat[inbound_sat.route_no==r], inbound_sat_offpk)
    ibop_hw_df = ibop_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "inbound_offpk_headway"]))
    
# Outbound, peak
obpk_hw_df = pd.DataFrame(columns = ["Route", "outbound_pk_headway"])

for r in routes:
    hw = sf.avgHeadway(outbound_sat[outbound_sat.route_no==r], outbound_sat_pk)
    obpk_hw_df = obpk_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "outbound_pk_headway"]))
    
# Outbound, offpeak
oboffpk_hw_df = pd.DataFrame(columns = ["Route", "outbound_offpk_headway"])

for r in routes:
    hw = sf.avgHeadway(outbound_sat[outbound_sat.route_no==r], outbound_sat_offpk)
    oboffpk_hw_df = oboffpk_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "outbound_offpk_headway"]))
    
routes_df = pd.merge(routes_df, ibpk_hw_df)
routes_df = pd.merge(routes_df, ibop_hw_df)
routes_df = pd.merge(routes_df, obpk_hw_df)
routes_df = pd.merge(routes_df, oboffpk_hw_df)

Peak vehicles

In [14]:
pv_df = pd.DataFrame(columns = ["Route", "PV"])

for r in routes:
    pv = sf.peak_vehicle(sat[sat.route_no==r])
    pv_df = pv_df.append(pd.DataFrame(zip([r], [pv]), columns = ["Route", "PV"]))

routes_df = pd.merge(routes_df, pv_df)

Average trip time: peak/nonpeak and inbound/outbound

In [15]:
# Inbound, peak
ibpk_time_df = pd.DataFrame(columns = ["Route", "inbound_pk_time"])

for r in routes:
    temp = pd.merge(inbound_sat[inbound_sat.route_no==r], inbound_sat_pk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    ibpk_time_df = ibpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "inbound_pk_time"]))
    
# Inbound, offpeak
iboffpk_time_df = pd.DataFrame(columns = ["Route", "inbound_offpk_time"])

for r in routes:
    temp = pd.merge(inbound_sat[inbound_sat.route_no==r], inbound_sat_offpk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    iboffpk_time_df = iboffpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "inbound_offpk_time"]))
    
# Outbound, peak
obpk_time_df = pd.DataFrame(columns = ["Route", "outbound_pk_time"])

for r in routes:
    temp = pd.merge(outbound_sat[outbound_sat.route_no==r], outbound_sat_pk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    obpk_time_df = obpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "outbound_pk_time"]))
    
# Outbound, offpeak
oboffpk_time_df = pd.DataFrame(columns = ["Route", "outbound_offpk_time"])

for r in routes:
    temp = pd.merge(outbound_sat[outbound_sat.route_no==r], outbound_sat_offpk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    oboffpk_time_df = oboffpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "outbound_offpk_time"]))

routes_df = pd.merge(routes_df, ibpk_time_df)
routes_df = pd.merge(routes_df, iboffpk_time_df)
routes_df = pd.merge(routes_df, obpk_time_df)
routes_df = pd.merge(routes_df, oboffpk_time_df)

TypeError: strptime() argument 1 must be str, not float

In [None]:
routes_df.to_csv("../Data/saturday_service_metrics.csv", index=False)

### Sunday

In [None]:
routes_df = pd.read_csv("../Data/System_Routes.csv")

Trips inbound and outbound

In [None]:
# Inbound
ibcount_df = pd.DataFrame(columns = ["Route", "inbound_trips"])

for r in routes:
    count = sf.countTrips(inbound_sun[inbound_sun.route_no==r])
    ibcount_df = ibcount_df.append(pd.DataFrame(zip([r], [count]), columns=["Route", "inbound_trips"]))
    
# Outbound
obcount_df = pd.DataFrame(columns = ["Route", "outbound_trips"])
for r in routes:
    count = sf.countTrips(outbound_sun[outbound_sun.route_no==r])
    obcount_df = obcount_df.append(pd.DataFrame(zip([r], [count]), columns=["Route", "outbound_trips"]))
    
routes_df = pd.merge(routes_df, ibcount_df)
routes_df = pd.merge(routes_df, obcount_df)

Start time inbound and outbound

In [None]:
# Inbound
ibstart_df = pd.DataFrame(columns = ["Route", "inbound_start"])

for r in routes:
    start = sf.startTime(inbound_sun[inbound_sun.route_no==r])
    ibstart_df = ibstart_df.append(pd.DataFrame(zip([r], [start]), columns=["Route", "inbound_start"]))
    
# Outbound
obstart_df = pd.DataFrame(columns = ["Route", "outbound_start"])

for r in routes:
    start = sf.startTime(outbound_sun[outbound_sun.route_no==r])
    obstart_df = obstart_df.append(pd.DataFrame(zip([r], [start]), columns=["Route", "outbound_start"]))
    
routes_df = pd.merge(routes_df, ibstart_df)
routes_df = pd.merge(routes_df, obstart_df)

Average headway: peak/nonpeak and inbound/outbound

In [None]:
# Inbound, peak
ibpk_hw_df = pd.DataFrame(columns = ["Route", "inbound_pk_headway"])

for r in routes:
    hw = sf.avgHeadway(inbound_sun[inbound_sun.route_no==r], inbound_sun_pk)
    ibpk_hw_df = ibpk_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "inbound_pk_headway"]))
    
# Inbound, offpeak
ibop_hw_df = pd.DataFrame(columns = ["Route", "inbound_offpk_headway"])

for r in routes:
    hw = sf.avgHeadway(inbound_sun[inbound_sun.route_no==r], inbound_sun_offpk)
    ibop_hw_df = ibop_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "inbound_offpk_headway"]))
    
# Outbound, peak
obpk_hw_df = pd.DataFrame(columns = ["Route", "outbound_pk_headway"])

for r in routes:
    hw = sf.avgHeadway(outbound_sun[outbound_sun.route_no==r], outbound_sun_pk)
    obpk_hw_df = obpk_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "outbound_pk_headway"]))
    
# Outbound, offpeak
oboffpk_hw_df = pd.DataFrame(columns = ["Route", "outbound_offpk_headway"])

for r in routes:
    hw = sf.avgHeadway(outbound_sun[outbound_sun.route_no==r], outbound_sun_offpk)
    oboffpk_hw_df = oboffpk_hw_df.append(pd.DataFrame(zip([r], [hw]), columns = ["Route", "outbound_offpk_headway"]))
    
routes_df = pd.merge(routes_df, ibpk_hw_df)
routes_df = pd.merge(routes_df, ibop_hw_df)
routes_df = pd.merge(routes_df, obpk_hw_df)
routes_df = pd.merge(routes_df, oboffpk_hw_df)

Peak vehicles

In [None]:
pv_df = pd.DataFrame(columns = ["Route", "PV"])

for r in routes:
    pv = sf.peak_vehicle(sun[sun.route_no==r])
    pv_df = pv_df.append(pd.DataFrame(zip([r], [pv]), columns = ["Route", "PV"]))

routes_df = pd.merge(routes_df, pv_df)

Average trip time: peak/nonpeak and inbound/outbound

In [None]:
# Inbound, peak
ibpk_time_df = pd.DataFrame(columns = ["Route", "inbound_pk_time"])

for r in routes:
    temp = pd.merge(inbound_sun[inbound_sun.route_no==r], inbound_sun_pk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    ibpk_time_df = ibpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "inbound_pk_time"]))
    
# Inbound, offpeak
iboffpk_time_df = pd.DataFrame(columns = ["Route", "inbound_offpk_time"])

for r in routes:
    temp = pd.merge(inbound_sun[inbound_sun.route_no==r], inbound_sun_offpk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    iboffpk_time_df = iboffpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "inbound_offpk_time"]))
    
# Outbound, peak
obpk_time_df = pd.DataFrame(columns = ["Route", "outbound_pk_time"])

for r in routes:
    temp = pd.merge(outbound_sun[outbound_sun.route_no==r], outbound_sun_pk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    obpk_time_df = obpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "outbound_pk_time"]))
    
# Outbound, offpeak
oboffpk_time_df = pd.DataFrame(columns = ["Route", "outbound_offpk_time"])

for r in routes:
    temp = pd.merge(outbound_sun[outbound_sun.route_no==r], outbound_sun_offpk,
                   left_on = "trip_id", right_on = "Trip")
    time = sf.timeRange(temp)
    oboffpk_time_df = oboffpk_time_df.append(pd.DataFrame(zip([r], [time]), columns = ["Route", "outbound_offpk_time"]))

routes_df = pd.merge(routes_df, ibpk_time_df)
routes_df = pd.merge(routes_df, iboffpk_time_df)
routes_df = pd.merge(routes_df, obpk_time_df)
routes_df = pd.merge(routes_df, oboffpk_time_df)

In [None]:
routes_df.to_csv("../Data/sunday_service_metrics.csv", index=False)