# Write more functions for GTFS classes

Sunday, March 24, 2019

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np

import os
import gtfs_transformer as gt

from shapely.geometry import Point
from shapely.ops import nearest_points
from geopy.distance import great_circle
import matplotlib.pyplot as plt
from tqdm import tqdm as tqdm_base, tqdm_notebook as tqdm

tqdm_base.pandas()

In [131]:
import copy

## Create GTFS objects for July 2018 and August 2018 by reading from directory

### To Do:

I'd like to add the ability to initialize the gtfs object directly from the `.zip` file downloaded from King County Metro, rather than having to unzip the file first. I don't think you can do this directly using [`pandas.read_csv`](http://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html) because the `.zip` file contains multiple `.csv`'s. However, I think it should be possible using the Python [`zipfile`](https://pymotw.com/2/zipfile/) module.

In [117]:
#Initializes GTFS object with tables in given directories, inferring date from directory name
gtfs_jul18 = gt.StaticGTFS('../data/source/gtfs_20180718')
gtfs_aug18 = gt.StaticGTFS('../data/source/gtfs_20180815')

In [10]:
gtfs_aug18.table_names

['agency',
 'block',
 'block_trip',
 'calendar',
 'calendar_dates',
 'fare_attributes',
 'fare_rules',
 'routes',
 'shapes',
 'stop_times',
 'stops',
 'trips']

## Get route id's using `route_ids_from_names` function

In [13]:
# This creates a mapping for all routes in the routes table
route_ids = gtfs_aug18.route_ids_from_names()
route_ids.head(10)

Unnamed: 0_level_0,route_id
route_short_name,Unnamed: 1_level_1
1,100001
10,100002
101,100003
105,100004
106,100005
107,100006
11,100009
111,100011
113,100012
114,100013


In [8]:
route_ids.loc['D Line']

route_id    102581
Name: D Line, dtype: int64

In [9]:
route_ids.loc[['10', 'D Line']]

Unnamed: 0_level_0,route_id
route_short_name,Unnamed: 1_level_1
10,100002
D Line,102581


In [12]:
# Can get same table by passing specific lines to function
gtfs_aug18.route_ids_from_names(10, 'D Line')

Unnamed: 0_level_0,route_id
route_short_name,Unnamed: 1_level_1
10,100002
D Line,102581


In [137]:
route_ids.at['10', 'route_id']

100002

In [139]:
# # This doesn't work...
# route_ids.at[['10', 'D Line'], 'route_id']

In [140]:
gtfs_aug18.route_ids_from_names(10, 'D Line').values

array([[100002],
       [102581]])

In [142]:
route_ids.head().to_dict()

{'route_id': {'1': 100001,
  '10': 100002,
  '101': 100003,
  '105': 100004,
  '106': 100005}}

In [144]:
route_ids.to_dict()['route_id']['10']

100002

In [21]:
route_ids.shape

(221, 1)

In [113]:
gtfs_aug18.routes.head(3)

Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color
0,100001,KCM,1,,Kinnear - Downtown Seattle,3,http://metro.kingcounty.gov/schedules/001/n0.html,,
1,100002,KCM,10,,Capitol Hill - Downtown Seattle,3,http://metro.kingcounty.gov/schedules/010/n0.html,,
2,100003,KCM,101,,Renton Transit Center - Downtown Seattle,3,http://metro.kingcounty.gov/schedules/101/n0.html,,


## We want to display trip headsign and `direction_id` for a route to know which direction is which

However, there may be more than one headsign for a given direction...

In [11]:
gtfs_aug18.trips.head()

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,peak_flag,fare_id
0,100227,19845,34734386,Downtown Seattle,LOCAL,1,5045281,20047002,0,101.0
1,100227,19845,34734387,Downtown Seattle,LOCAL,1,5045281,20047002,0,101.0
2,100227,19845,34734388,Downtown Seattle,LOCAL,1,5045281,20047002,0,101.0
3,100227,19845,34734389,Downtown Seattle,LOCAL,1,5045281,20047002,0,101.0
4,100227,19845,34734390,Downtown Seattle,LOCAL,1,5045281,20047002,0,101.0


In [14]:
gtfs_aug18.trips.shape

(59857, 10)

In [150]:
gtfs_aug18.trips['trip_short_name'].unique()

array(['LOCAL', 'EXPRESS'], dtype=object)

In [151]:
# Less than 10% of trips are EXPRESS
gtfs_aug18.trips[gtfs_aug18.trips['trip_short_name']=='EXPRESS'].shape

(5195, 10)

### Try out different ways of aggregating the `trips` data

In [20]:
gtfs_aug18.trips.groupby(by=['route_id', 'trip_headsign']).agg({'direction_id': 'mean'}).shape

(542, 1)

In [26]:
gtfs_aug18.trips.groupby(by=['route_id', 'direction_id']).agg({'trip_headsign': 'max'}).shape

(438, 1)

In [28]:
gtfs_aug18.trips.groupby(by=['route_id', 'trip_headsign']).agg({'direction_id': 'max', 'shape_id': 'max'}).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,direction_id,shape_id
route_id,trip_headsign,Unnamed: 2_level_1,Unnamed: 3_level_1
100001,Downtown Seattle,1,20001010
100001,Kinnear Seattle Center W,0,11001012
100002,Atlantic Base Via Broadway,1,20010004
100002,Capitol Hill Via 15th Ave E,0,11010010
100002,Downtown Seattle,1,20010007
100003,Downtown Seattle,1,10101008
100003,S Renton P&R,0,21101007
100004,Renton Highlands,0,11105002
100004,Renton Renton Tech Coll,1,20105003
100005,International District Chinatown Station,1,10106037


In [24]:
gtfs_aug18.routes.shape

(221, 9)

In [25]:
gtfs_aug18.trips.direction_id.unique()

array([1, 0])

In [33]:
gtfs_aug18.trips.groupby(
    by=['route_id', 'trip_headsign', 'direction_id', 'shape_id']
).agg({'trip_id': ['count', 'min', 'max']}).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,trip_id,trip_id,trip_id
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,count,min,max
route_id,trip_headsign,direction_id,shape_id,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
100001,Downtown Seattle,1,20001009,8,34736146,39483896
100001,Downtown Seattle,1,20001010,274,34735996,39653128
100001,Kinnear Seattle Center W,0,11001011,270,34735973,39653018
100001,Kinnear Seattle Center W,0,11001012,12,34736010,39483760
100002,Atlantic Base Via Broadway,1,20010004,10,34740661,39476320
100002,Capitol Hill Via 15th Ave E,0,11010005,8,34735719,39483469
100002,Capitol Hill Via 15th Ave E,0,11010006,470,38426627,39483498
100002,Capitol Hill Via 15th Ave E,0,11010010,8,38426760,39476321
100002,Downtown Seattle,1,20010002,470,34735660,39483499
100002,Downtown Seattle,1,20010007,2,34740670,39476197


In [30]:
gtfs_aug18.trips.groupby(
    by=['route_id', 'trip_headsign', 'shape_id', 'direction_id']
).agg({'trip_id': 'count'}).shape

(914, 1)

In [92]:
gtfs_aug18.trips.merge(gtfs_aug18.routes[['route_id', 'route_short_name', 'route_desc']], on='route_id').groupby(
    by=['route_id', 'route_short_name', 'route_desc', 'trip_headsign', 'direction_id', 'shape_id']
).agg({'trip_id': ['count', 'min', 'max']}).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,trip_id,trip_id,trip_id
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,count,min,max
route_id,route_short_name,route_desc,trip_headsign,direction_id,shape_id,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
100001,1,Kinnear - Downtown Seattle,Downtown Seattle,1,20001009,8,34736146,39483896
100001,1,Kinnear - Downtown Seattle,Downtown Seattle,1,20001010,274,34735996,39653128
100001,1,Kinnear - Downtown Seattle,Kinnear Seattle Center W,0,11001011,270,34735973,39653018
100001,1,Kinnear - Downtown Seattle,Kinnear Seattle Center W,0,11001012,12,34736010,39483760
100002,10,Capitol Hill - Downtown Seattle,Atlantic Base Via Broadway,1,20010004,10,34740661,39476320
100002,10,Capitol Hill - Downtown Seattle,Capitol Hill Via 15th Ave E,0,11010005,8,34735719,39483469
100002,10,Capitol Hill - Downtown Seattle,Capitol Hill Via 15th Ave E,0,11010006,470,38426627,39483498
100002,10,Capitol Hill - Downtown Seattle,Capitol Hill Via 15th Ave E,0,11010010,8,38426760,39476321
100002,10,Capitol Hill - Downtown Seattle,Downtown Seattle,1,20010002,470,34735660,39483499
100002,10,Capitol Hill - Downtown Seattle,Downtown Seattle,1,20010007,2,34740670,39476197


### Ok, I think the following dataframe looks good for the final version

In [111]:
gtfs_aug18.trips.merge(
    gtfs_aug18.routes[['route_id', 'route_short_name', 'route_desc']],
    on='route_id'
).groupby(
    by=['route_short_name', 'direction_id']
).agg(
    {'route_desc': 'max', 
     'trip_headsign': lambda x: x.unique(),
     'shape_id': lambda x: len(x.unique()),
     'trip_id': 'count'}
).rename(
    columns={'shape_id': 'shape_count', 'trip_id': 'trip_count'}
        ).head(20)#.loc[('10',0), 'trip_headsign']
# .reset_index().set_index('route_short_name').head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,route_desc,trip_headsign,shape_count,trip_count
route_short_name,direction_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0,Kinnear - Downtown Seattle,Kinnear Seattle Center W,2,282
1,1,Kinnear - Downtown Seattle,Downtown Seattle,2,282
10,0,Capitol Hill - Downtown Seattle,Capitol Hill Via 15th Ave E,3,486
10,1,Capitol Hill - Downtown Seattle,"[Downtown Seattle , Atlantic Base Via Broadway]",3,482
101,0,Renton Transit Center - Downtown Seattle,S Renton P&R,2,242
101,1,Renton Transit Center - Downtown Seattle,Downtown Seattle,2,228
102,0,Fairwood - Downtown Seattle,"[Fairwood S Renton P&R, S Renton P&R ]",4,20
102,1,Fairwood - Downtown Seattle,"[Downtown Seattle S Renton P&R, Downtown Seatt...",4,24
105,0,Renton Highlands - Renton Transit Center,Renton Highlands,1,168
105,1,Renton Highlands - Renton Transit Center,Renton Renton Tech Coll,1,174


In [110]:
# Hmm, using lambda x: x.unique() in .agg() returns a string if there's one object,
# and a numpy array of strings if there are more... Seems like that could be either useful or annoying...
# (Result of running the above cell with .loc[('10',0), 'trip_headsign'] uncommented)
type(_)

str

## Write a function to return a summary dataframe like the one above, optionally passing in specific route names

Note that the `Redmond LOOP` line only has one direction. The length of the returned dataframe for all routes is 438, which is 4 less than twice the number of routes (221), presumably because some routes only have one direction, like the Redmond Loop.

In [210]:
gtfs_aug18.trips_by_route_and_direction(10, 7, 'C Line', 'E Line', 8, 'Redmond LOOP', 41)

Unnamed: 0_level_0,Unnamed: 1_level_0,route_desc,trip_headsign,shape_count,trip_count,block_count,trip_short_name,peak_flag,fare_id
route_short_name,direction_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10,0,Capitol Hill - Downtown Seattle,Capitol Hill Via 15th Ave E,3,486,36,LOCAL,"[0, 1]","[101.0, 110.0]"
10,1,Capitol Hill - Downtown Seattle,"[Downtown Seattle , Atlantic Base Via Broadway]",3,482,36,LOCAL,"[0, 1]","[101.0, 110.0]"
41,0,Lake City - Northgate TC - Downtown Seattle,"[Lake City Northgate, Northgate Northeast 125t...",8,450,110,LOCAL,"[0, 1]","[101.0, 110.0]"
41,1,Lake City - Northgate TC - Downtown Seattle,"[Downtown Seattle , Downtown Seattle Northgate]",10,438,102,LOCAL,"[0, 1]","[101.0, 110.0]"
7,0,Prentice St - Rainier Beach - Downtown Seattle,"[Rainier Beach Via Rainier Ave S, Prentice St ...",7,641,137,LOCAL,"[0, 1]","[101.0, 110.0]"
7,1,Prentice St - Rainier Beach - Downtown Seattle,"[Downtown Seattle Via Rainier Ave S, S Jackson...",5,641,137,LOCAL,"[0, 1]","[101.0, 110.0]"
8,0,Seattle Center - Capitol Hill - Rainier Beach,Seattle Center,1,388,67,LOCAL,[0],[101.0]
8,1,Seattle Center - Capitol Hill - Rainier Beach,Mount Baker Transit Center,1,396,67,LOCAL,[0],[101.0]
C Line,0,Valley St/Fairview Av N - SW Barton St/26 Av SW,"[Westwood Village , West Seattle Alaska Junction]",4,579,134,LOCAL,"[0, 1]","[101.0, 110.0]"
C Line,1,Valley St/Fairview Av N - SW Barton St/26 Av SW,South Lake Union Downtown Seattle,2,581,148,LOCAL,"[0, 1]","[101.0, 110.0]"


In [207]:
gtfs_aug18.trips_by_route_and_direction().shape

(438, 8)

In [217]:
trips_by_direction = gtfs_aug18.trips_by_route_and_direction()
trips_by_direction.loc['101', 'trip_short_name'] == 'LOCAL'
# trips_by_direction[(trips_by_direction.trip_short_name == 'LOCAL')]

direction_id
0    True
1    True
Name: trip_short_name, dtype: bool

In [198]:
gtfs_aug18.trips[gtfs_aug18.trips['fare_id'].isna()].shape

(2027, 10)

In [200]:
gtfs_aug18.trips['fare_id'].unique()

array([101., 110., 100., 501., 305., 900.,  nan, 906., 301., 102., 112.])

In [192]:
len(gtfs_aug18.trips['fare_id'].unique())

11

In [300]:
gtfs_aug18.trips['fare_id'].nunique()

10

In [193]:
gtfs_aug18.trips['fare_id'].unique()[6]

nan

In [194]:
# WTF?! Why is this False??
gtfs_aug18.trips['fare_id'].unique()[6] == np.nan

False

In [203]:
# Oh, you need to use np.isnan
np.isnan(gtfs_aug18.trips['fare_id'].unique()[6])

True

In [299]:
set(gtfs_aug18.trips['fare_id'].head())

{101.0}

In [297]:
gt.list_unique(gtfs_aug18.trips['fare_id'])[:40]

(nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 100.0,
 101.0,
 102.0,
 110.0,
 112.0,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan)

### Double check that our `trip_count` column is counting the right thing

The `count` function will count non-null rows, so our implementation assumes that each row in the trips table represents a unique trip. Let's verify this:

In [130]:
# Count total number of rows in trips table
gtfs_aug18.trips.shape

(59857, 10)

In [125]:
# Count number of unique trip_id's in trips table -- good, it's equal to above
gtfs_aug18.trips['trip_id'].unique().shape

(59857,)

In [123]:
# Make sure our trip counts add up to the right thing
gtfs_aug18.trips_by_route_and_direction()['trip_count'].sum()

59857

## Add another convenience function for the routes table

In [312]:
gtfs_aug18.routes_by_name(7, 120, 'D Line')

Unnamed: 0_level_0,route_desc,agency_id,route_id
route_short_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
120,Burien TC - Westwood Village - Downtown Seattle,KCM,100019
7,Prentice St - Rainier Beach - Downtown Seattle,KCM,100263
D Line,Blue Ridge/Crown Hill - Ballard - Downtown Sea...,KCM,102581


In [148]:
# If we want a dictionary from route names to route id's. Leave out the route names if you want all routes.
gtfs_aug18.routes_by_name(7, 120, 'D Line')['route_id'].to_dict()

{'120': 100019, '7': 100263, 'D Line': 102581}

In [316]:
gtfs_aug18.fare_attributes

Unnamed: 0,fare_id,agency_id,fare_period_id,price,descriptions,currency_type,payment_method,transfers,transfer_duration
0,100,KCM,1,0.0,KCM Regular Free,USD,0,,5400
1,101,KCM,1,2.75,KCM Regular 1 Zone,USD,0,,5400
2,102,KCM,1,2.75,KCM Regular 2 Zone,USD,0,,5400
3,110,KCM,2,2.75,KCM Peak 1 Zone,USD,0,,5400
4,112,KCM,2,2.75,KCM Peak 2 Zone,USD,0,,5400
5,301,KCM,1,4.25,Boeing,USD,0,0.0,0
6,302,KCM,1,3.75,Boeing,USD,0,0.0,0
7,305,KCM,1,4.75,Custom High,USD,0,0.0,0
8,501,ST,1,2.75,ST Regular 1 Zone,USD,0,0.0,0
9,502,ST,1,3.75,ST Regular 2 Zone,USD,0,0.0,0


## Add function to group by route and shape

Whoa, what's the deal with all the shapes for the 41?

In [209]:
gtfs_aug18.trips_by_route_and_shape(7,8, 41, 'D Line')

Unnamed: 0_level_0,Unnamed: 1_level_0,direction_id,trip_headsign,trip_count,block_count,trip_short_name,peak_flag,fare_id
route_short_name,shape_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
41,11041008,0,Lake City Northgate,83,37,LOCAL,"[0, 1]","[101.0, 110.0]"
41,11041011,0,Lake City Northgate,117,37,LOCAL,"[0, 1]","[101.0, 110.0]"
41,11041013,0,Northgate Northeast 125th Only,23,19,LOCAL,"[0, 1]","[101.0, 110.0]"
41,11041015,0,Lake City Northgate,5,5,LOCAL,[0],[101.0]
41,11041018,0,Lake City Northgate,80,32,LOCAL,[0],[101.0]
41,11041020,0,Lake City Northgate,114,35,LOCAL,[0],[101.0]
41,11041021,0,Northgate Northeast 125th Only,23,19,LOCAL,[0],[101.0]
41,11041023,0,Lake City Northgate,5,5,LOCAL,[0],[101.0]
41,20041006,1,Downtown Seattle Northgate,133,38,LOCAL,"[0, 1]","[101.0, 110.0]"
41,20041009,1,Downtown Seattle,11,11,LOCAL,"[0, 1]","[101.0, 110.0]"


### Hmm, some of the shape id's changed between July and August

For the 41, there are 18 shapes for both months

In [218]:
gtfs_jul18.trips_by_route_and_shape(7,8, 41, 'D Line')

Unnamed: 0_level_0,Unnamed: 1_level_0,direction_id,trip_headsign,trip_count,block_count,trip_short_name,peak_flag,fare_id
route_short_name,shape_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
41,11041002,0,Lake City Northgate,5,5,LOCAL,[0],[101.0]
41,11041008,0,Lake City Northgate,83,37,LOCAL,"[0, 1]","[101.0, 110.0]"
41,11041011,0,Lake City Northgate,117,37,LOCAL,"[0, 1]","[101.0, 110.0]"
41,11041013,0,Northgate Northeast 125th Only,23,19,LOCAL,"[0, 1]","[101.0, 110.0]"
41,11041015,0,Lake City Northgate,5,5,LOCAL,[0],[101.0]
41,11041231,0,Lake City Northgate,114,35,LOCAL,[0],[101.0]
41,11041232,0,Lake City Northgate,80,32,LOCAL,[0],[101.0]
41,11041240,0,Northgate Northeast 125th Only,23,19,LOCAL,[0],[101.0]
41,20041003,1,Downtown Seattle,6,5,LOCAL,[0],[101.0]
41,20041005,1,Downtown Seattle Northgate,128,34,LOCAL,[0],[101.0]


## For some reason the following line originally threw an error

Mon-Tue, March 25-26, 2019

When calling `gtfs_jul18.trips_by_route_and_direction(41)` below, I got the error message "ValueError: Function does not reduce", even though the function seemed to be working fine above. After googling it, the problem [seems to be](https://stackoverflow.com/questions/27439023/pandas-groupby-agg-function-does-not-reduce) that pandas doesn't like it when either (1) an aggregation function can return different types (e.g. int or list), or (2) the returned types are not hashable (e.g. a list vs. a tuple).

The recommended solution was to always return a tuple, which is immutable and hence hashable, if you want to aggregate into a list. (Strangely, calling `.nunique()` sometimes gave the same "Function does not reduce" error as `.unique()`, even though it should always just return a number, not a numpy array. I think the problem has to do with the `nan`'s.)

I wrote a collection of simple aggregation functions such as `def list_unique(hashable_objects): return tuple(set(hashable_objects))` and used these instead of calling `.unique()` or `.nunique()`. So far, this seems to work...

In [314]:
gtfs_jul18.trips_by_route_and_direction(41)

Unnamed: 0_level_0,Unnamed: 1_level_0,route_desc,trip_headsign,shape_count,trip_count,block_count,trip_short_name,peak_flag,fare_descriptions
route_short_name,direction_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
41,0,Lake City - Northgate TC - Downtown Seattle,"(Lake City Northgate, Northgate Northeast 125t...",8,450,110,"(LOCAL,)","(0, 1)","(KCM Peak 1 Zone, KCM Regular 1 Zone)"
41,1,Lake City - Northgate TC - Downtown Seattle,"(Downtown Seattle , Downtown Seattle Northgate)",10,438,102,"(LOCAL,)","(0, 1)","(KCM Peak 1 Zone, KCM Regular 1 Zone)"


In [315]:
# WTF?! Why does this work now even though I haven't updated the aggregation functions?
gtfs_jul18.trips_by_route_and_shape(41)

Unnamed: 0_level_0,Unnamed: 1_level_0,direction_id,trip_headsign,trip_count,block_count,trip_short_name,peak_flag,fare_descriptions
route_short_name,shape_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
41,11041002,0,"(Lake City Northgate,)",5,5,"(LOCAL,)","(0,)","(KCM Regular 1 Zone,)"
41,11041008,0,"(Lake City Northgate,)",83,37,"(LOCAL,)","(0, 1)","(KCM Peak 1 Zone, KCM Regular 1 Zone)"
41,11041011,0,"(Lake City Northgate,)",117,37,"(LOCAL,)","(0, 1)","(KCM Peak 1 Zone, KCM Regular 1 Zone)"
41,11041013,0,"(Northgate Northeast 125th Only,)",23,19,"(LOCAL,)","(0, 1)","(KCM Peak 1 Zone, KCM Regular 1 Zone)"
41,11041015,0,"(Lake City Northgate,)",5,5,"(LOCAL,)","(0,)","(KCM Regular 1 Zone,)"
41,11041231,0,"(Lake City Northgate,)",114,35,"(LOCAL,)","(0,)","(KCM Regular 1 Zone,)"
41,11041232,0,"(Lake City Northgate,)",80,32,"(LOCAL,)","(0,)","(KCM Regular 1 Zone,)"
41,11041240,0,"(Northgate Northeast 125th Only,)",23,19,"(LOCAL,)","(0,)","(KCM Regular 1 Zone,)"
41,20041003,1,"(Downtown Seattle ,)",6,5,"(LOCAL,)","(0,)","(KCM Regular 1 Zone,)"
41,20041005,1,"(Downtown Seattle Northgate,)",128,34,"(LOCAL,)","(0,)","(KCM Regular 1 Zone,)"


In [277]:
gtfs_jul18.route_ids()['41']
# gtfs_jul18.trips.set_index('route_id').loc[gtfs_jul18.route_ids(41, 'C Line')]

100221

In [273]:
# gtfs_jul18.routes_by_name(41)['route_id'].to_dict()['41']
# gtfs_jul18.routes_by_name().loc['41', 'route_id']
# jul_trips_41 = gtfs_jul18.trips.set_index('route_id').loc[gtfs_jul18.routes_by_name().loc['41', 'route_id']]
jul_trips_41 = gtfs_jul18.trips.set_index('route_id').loc[gtfs_jul18.route_ids(41)]
jul_trips_41.head(10)

Unnamed: 0_level_0,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,peak_flag,fare_id
route_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
100221,44711,39485224,Downtown Seattle,LOCAL,1,5062060,20041003,0,101.0
100221,44711,39485225,Lake City Northgate,LOCAL,0,5061877,11041232,0,101.0
100221,44711,39485226,Downtown Seattle Northgate,LOCAL,1,5061877,20041228,0,101.0
100221,44711,39485777,Lake City Northgate,LOCAL,0,5061877,11041232,0,101.0
100221,44711,39485778,Downtown Seattle Northgate,LOCAL,1,5061873,20041005,0,101.0
100221,44711,39485779,Downtown Seattle Northgate,LOCAL,1,5061872,20041005,0,101.0
100221,44711,39485780,Downtown Seattle Northgate,LOCAL,1,5061871,20041005,0,101.0
100221,44711,39485781,Lake City Northgate,LOCAL,0,5061871,11041231,0,101.0
100221,44711,39485782,Downtown Seattle Northgate,LOCAL,1,5061880,20041228,0,101.0
100221,44711,39485783,Downtown Seattle Northgate,LOCAL,1,5061862,20041005,0,101.0


In [274]:
jul_trips_41.shape

(888, 9)

In [242]:
jul_trips_41.fare_id.unique()

array([101., 110.])

In [243]:
jul_trips_41.fare_id.nunique()

2

In [249]:
jul_trips_41.groupby('direction_id').agg({'fare_id': lambda x: x.nunique()})

Unnamed: 0_level_0,fare_id
direction_id,Unnamed: 1_level_1
0,2.0
1,2.0


In [287]:
sorted([gtfs_aug18, gtfs_jul18, gtfs_aug18])

TypeError: '<' not supported between instances of 'StaticGTFS' and 'StaticGTFS'