# Exploring static gtfs files with gtfstk

[GTFSTK](https://github.com/mrcagney/gtfstk) is a tool kit for analyzing General Transit Feed Specification (GTFS) data in memory without a database. It uses Pandas and Shapely to do the heavy lifting.

In [17]:
from pathlib import Path

import gtfstk as gt
import pandas as pd
import numpy as np

In [4]:
DATA_DIR = Path("data/")
%ls {DATA_DIR}

gtfs.zip


In [5]:
# Read GTFS feed
feed = gt.read_gtfs(DATA_DIR/"gtfs.zip", dist_units="km")
feed.describe()

Unnamed: 0,indicator,value
0,agencies,"[LADOT, LADOT, LADOT, LADOT, LADOT]"
1,timezone,America/Los_Angeles
2,start_date,20200418
3,end_date,20991231
4,num_routes,62
5,num_trips,6851
6,num_stops,2678
7,num_shapes,106
8,sample_date,20200423
9,num_routes_active_on_sample_date,0


In [6]:
feed.validate()

Unnamed: 0,type,message,table,rows
0,error,Invalid agency_lang; maybe has extra space cha...,agency,"[0, 1, 2, 3, 4]"
4,error,Invalid feed_lang; maybe has extra space chara...,feed_info,[0]
1,warning,Unrecognized column default_lang,feed_info,[]
2,warning,Unrecognized column feed_contact_email,feed_info,[]
3,warning,Unrecognized column feed_contact_url,feed_info,[]
5,warning,"Repeated pair (route_short_name, route_long_name)",routes,[11]
9,warning,"Repeated pair (trip_id, departure_time)",stop_times,"[104, 114, 149, 159, 212, 214, 217, 236, 244, ..."
6,warning,Unrecognized column level_id,stops,[]
7,warning,Unrecognized column platform_code,stops,[]
8,warning,Stop has no stop times,stops,"[9, 17, 1384, 1856, 2009, 2249, 2267, 2268, 22..."


In [7]:
# Trips table
feed.trips.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,6841,6842,6843,6844,6845,6846,6847,6848,6849,6850
trip_id,183-_3uV2MjGIO,183-_694En4HMYp,183-_YUYwDUy6X,183--784XcojfNQ,183--HZ6PxH8z9R,183--N47c2f9CGf,183-00t9z1nfl,183-031a0ikvr,183-045yfk8ua,183-04hytkcoa,...,47-zq19fh7pv,47-zQBxObi71XT,47-zQfMBOCKbgf,47-zr37opyrh,47-zRD6nUAqeG,47-zrxsnpruzxW,47-zt_Hf3luUEO,47-zukU0RTiA0c,47-zWoSzHKcr5c,47-ZZEi1FcnqL4
route_id,4445,4446,4445,4446,4446,4446,4446,4447,4444,4444,...,30,801,801,867,804,804,801,800,5285,799
service_id,14,14,14,14,14,14,12,12,12,12,...,10,10,10,10,11,10,9,11,10,9
trip_headsign,EXPO PARK - USC,Union Station/South Park,EXPO PARK - USC,Union Station/South Park,Union Station/South Park,Union Station/South Park,Union Station/South Park,Little Tokyo/City West,To Fashion District,To Fashion District,...,422 to Thousand Oaks,Sepulveda Park via Kester Ave,Sepulveda Park via Kester Ave,To Expo Park,Sepulveda Park via Van Nuys Blvd,Sepulveda Park via Van Nuys Blvd,Sepulveda Park via Kester Ave,Civic Center via Whitsett,To Mission College,Civic Center via Hazeltine
trip_short_name,,,,,,,,,,,...,,,,,,,,,,
direction_id,0,1,0,0,1,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0
block_id,286991,286993,286990,286995,286979,286997,286914,286895,286926,286934,...,276891,276838,276837,276882,276976,276843,276826,276972,276845,276821
shape_id,13220,18480,13220,17538,18480,17538,18480,18751,16607,16607,...,13226,7854,7854,4186,7855,7855,7854,15089,18148,15088
wheelchair_accessible,,,,,,,,,,,...,,,,,,,,,,


In [8]:
# Onl show route, trip, and shape columns: slice columns, AKA filter
feed.trips.filter(["route_id", "trip_id", "shape_id"]).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,6841,6842,6843,6844,6845,6846,6847,6848,6849,6850
route_id,4445,4446,4445,4446,4446,4446,4446,4447,4444,4444,...,30,801,801,867,804,804,801,800,5285,799
trip_id,183-_3uV2MjGIO,183-_694En4HMYp,183-_YUYwDUy6X,183--784XcojfNQ,183--HZ6PxH8z9R,183--N47c2f9CGf,183-00t9z1nfl,183-031a0ikvr,183-045yfk8ua,183-04hytkcoa,...,47-zq19fh7pv,47-zQBxObi71XT,47-zQfMBOCKbgf,47-zr37opyrh,47-zRD6nUAqeG,47-zrxsnpruzxW,47-zt_Hf3luUEO,47-zukU0RTiA0c,47-zWoSzHKcr5c,47-ZZEi1FcnqL4
shape_id,13220,18480,13220,17538,18480,17538,18480,18751,16607,16607,...,13226,7854,7854,4186,7855,7855,7854,15089,18148,15088


In [9]:
# 10 unique route_ids from trips.txt
# Remove slice to get all route_ids
np.vstack(feed.trips.route_id.unique())[0:10]

array([['4445'],
       ['4446'],
       ['4447'],
       ['4444'],
       ['4443'],
       ['563'],
       ['605'],
       ['603'],
       ['4868'],
       ['572']], dtype='<U4')

In [10]:
# Find all trips of a specfic route: slice rows
feed.trips.loc[lambda x: x.route_id == "572"].T

Unnamed: 0,1649,1659,1684,1690,1722,1777,1784,1841,1891,1930,...,4307,4337,4355,4356,4385,4442,4479,4542,4598,4604
trip_id,30-0__eb8iPEGugV,30-0_-1gLpMd2YmR,30-0_0cy0Vv1P4Tv,30-0_0s93ipl-1mp,30-0_1VodfOH7LmX,30-0_4AExetngIyJ,30-0_4lHazv_R7SB,30-0_6QjFQlMeMHT,30-0_9E--mlxDNbx,30-0_ag28eca58XW,...,30-2_OutoX_80hCU,30-2_px0_PM3Gckm,30-2_QU-hge-RF_o,30-2_QuNHb2k34Kq,30-2_rQ8mgbpag3k,30-2_tWMaszH7kAp,30-2_VDatc-H6AOY,30-2_XDFMJD8u4ty,30-2_zBk7y4apbkH,30-2_znMFe9a6ZSB
route_id,572,572,572,572,572,572,572,572,572,572,...,572,572,572,572,572,572,572,572,572,572
service_id,0,0,0,0,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2
trip_headsign,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,...,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH
trip_short_name,,,,,,,,,,,...,,,,,,,,,,
direction_id,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
block_id,287352,287351,287351,287352,287352,287352,287352,287351,287351,287351,...,287463,287463,287463,287463,287462,287462,287462,287463,287462,287463
shape_id,8397,8397,8397,8397,8397,8397,8397,8397,8397,8397,...,8397,8397,8397,8397,8397,8397,8397,8397,8397,8397
wheelchair_accessible,,,,,,,,,,,...,,,,,,,,,,


In [11]:
# Group all trips by route: group
feed.trips.groupby("route_id").get_group("572").T

Unnamed: 0,1649,1659,1684,1690,1722,1777,1784,1841,1891,1930,...,4307,4337,4355,4356,4385,4442,4479,4542,4598,4604
trip_id,30-0__eb8iPEGugV,30-0_-1gLpMd2YmR,30-0_0cy0Vv1P4Tv,30-0_0s93ipl-1mp,30-0_1VodfOH7LmX,30-0_4AExetngIyJ,30-0_4lHazv_R7SB,30-0_6QjFQlMeMHT,30-0_9E--mlxDNbx,30-0_ag28eca58XW,...,30-2_OutoX_80hCU,30-2_px0_PM3Gckm,30-2_QU-hge-RF_o,30-2_QuNHb2k34Kq,30-2_rQ8mgbpag3k,30-2_tWMaszH7kAp,30-2_VDatc-H6AOY,30-2_XDFMJD8u4ty,30-2_zBk7y4apbkH,30-2_znMFe9a6ZSB
route_id,572,572,572,572,572,572,572,572,572,572,...,572,572,572,572,572,572,572,572,572,572
service_id,0,0,0,0,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2
trip_headsign,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,...,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH,WILSHIRE CENTER/KOREATOWN VERMONT VIA 9TH
trip_short_name,,,,,,,,,,,...,,,,,,,,,,
direction_id,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
block_id,287352,287351,287351,287352,287352,287352,287352,287351,287351,287351,...,287463,287463,287463,287463,287462,287462,287462,287463,287462,287463
shape_id,8397,8397,8397,8397,8397,8397,8397,8397,8397,8397,...,8397,8397,8397,8397,8397,8397,8397,8397,8397,8397
wheelchair_accessible,,,,,,,,,,,...,,,,,,,,,,


In [12]:
# Computer number of trips per route: split, apply, combine
def my_agg(group):
    d = {}
    d["num_trips"] = group.shape[0]
    return pd.Series(d)

# Use method chaining
(
    feed.trips
    # Split
    .groupby("route_id")
    .apply(my_agg)
    .reset_index()
    # Join in route short name from feed.routes
    .merge(feed.routes.filter(["route_id", "route_short_name", "route_long_name", "route_type"]))
    .sort_values("num_trips", ascending=False)
    .T
)

Unnamed: 0,15,22,17,19,21,18,14,16,44,59,...,6,55,0,58,13,60,5,11,12,1
route_id,4444,4869,4446,4577,4868,4447,4443,4445,712,870,...,27,858,1458,869,4291,871,26,4278,4290,1524
num_trips,576,448,442,311,280,225,209,193,193,192,...,14,12,11,10,8,8,6,4,4,2
route_short_name,E,Pico Union/Echo Park,D,Observatory/Los Feliz,El Sereno/City Terrace,A,B,F,Chesterfield Square,142,...,438,437A,438B,574,439,534,431,438,437B,431B
route_long_name,DASH E,DASH Pico Union/Echo Park,DASH D,DASH Observatory/Los Feliz,DASH El Sereno/City Terrace,DASH A,DASH B,DASH F,DASH Chesterfield Square,CE142,...,CE438,CE437A,CE438B,CE574,CE439,CE534,CE431,CE438,CE437B,CE431B
route_type,3,3,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,3,3,3


In [13]:
# Fill in shape_dist_traveled column of stop_times for later distance calculations.
trip_stats = feed.compute_trip_stats()
feed = feed.append_dist_to_stop_times(trip_stats)
feed.stop_times.T

Unnamed: 0,97,98,99,100,101,102,103,104,105,106,...,229356,229357,229358,229359,229360,229361,229362,229363,229364,229365
trip_id,183--784XcojfNQ,183--784XcojfNQ,183--784XcojfNQ,183--784XcojfNQ,183--784XcojfNQ,183--784XcojfNQ,183--784XcojfNQ,183--784XcojfNQ,183--784XcojfNQ,183--784XcojfNQ,...,47-zukU0RTiA0c,47-zukU0RTiA0c,47-zukU0RTiA0c,47-zukU0RTiA0c,47-zukU0RTiA0c,47-zukU0RTiA0c,47-zukU0RTiA0c,47-zukU0RTiA0c,47-zukU0RTiA0c,47-zukU0RTiA0c
arrival_time,15:15:00,15:17:00,15:19:00,15:20:00,15:22:00,15:23:00,15:26:00,15:26:00,15:27:00,15:28:00,...,16:12:00,16:13:00,16:14:00,16:16:00,16:17:00,16:18:00,16:19:00,16:20:00,16:21:00,16:23:00
departure_time,15:15:00,15:17:00,15:19:00,15:20:00,15:22:00,15:23:00,15:26:00,15:26:00,15:27:00,15:28:00,...,16:12:00,16:13:00,16:14:00,16:16:00,16:17:00,16:18:00,16:19:00,16:20:00,16:21:00,16:23:00
stop_id,6715955,6715956,6715957,6715958,6715959,6715960,6715962,6715963,6715992,6715993,...,391853,391854,391855,391856,391857,391858,391859,391860,391861,4017085
stop_sequence,0,1,2,3,4,5,6,7,8,9,...,34,35,36,37,38,39,40,41,42,43
stop_headsign,,,,,,,,,,,...,,,,,,,,,,
pickup_type,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
drop_off_type,,,,,,,,,,,...,,,,,,,,,,
shape_dist_traveled,0.0,0.230718,0.531221,0.720078,0.925299,1.071414,1.747169,1.968046,2.169424,2.373461,...,14979.816402,15336.478697,15693.140992,16406.465583,16763.127878,17119.790173,17476.452469,17833.114764,18189.777059,18903.10165
timepoint,1,0,0,0,0,1,0,0,1,0,...,0,0,0,1,0,0,0,0,0,1


stop_times.txt has an optional field that describes the distance that the trip traverses at each stop.

In [14]:
# Trip stats
trip_stats.T

Unnamed: 0,4667,4623,4668,4734,4669,5291,4665,5322,4666,4742,...,4642,5072,4617,5230,4701,4644,4704,4964,5325,5198
trip_id,45-0_2086796,45-0_0g0ggqtdd,45-0_2086798,45-0_53w9llpvz,45-0_2086800,45-0_y2q4j7qu6,45-0_2086791,45-0_zlcso1suc,45-0_2086793,45-0_61hou6p51,...,45-0_1gfsl1q1s,45-0_ixhoe1gpd,45-0_03r014nxj,45-0_titvhztys,45-0_2v7yr3z0l,45-0_1jn825esi,45-0_34g5wlxx8,45-0_c1a94ugbv,45-0_zns7c4ve2,45-0_rd2osu9or
route_id,1458,1458,1458,1458,1458,1458,1458,1458,1458,1458,...,900,900,900,900,900,900,900,900,900,900
route_short_name,438B,438B,438B,438B,438B,438B,438B,438B,438B,438B,...,Union Station/Bunker Hill Shuttle,Union Station/Bunker Hill Shuttle,Union Station/Bunker Hill Shuttle,Union Station/Bunker Hill Shuttle,Union Station/Bunker Hill Shuttle,Union Station/Bunker Hill Shuttle,Union Station/Bunker Hill Shuttle,Union Station/Bunker Hill Shuttle,Union Station/Bunker Hill Shuttle,Union Station/Bunker Hill Shuttle
route_type,3,3,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,3,3,3
direction_id,0,0,0,0,0,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
shape_id,11697,11697,11697,11697,11697,13221,13221,13221,13221,13221,...,11737,11737,11737,11737,11737,11737,11737,11737,11737,11737
num_stops,19,19,19,19,19,20,20,20,20,20,...,4,4,4,4,4,4,4,4,4,4
start_time,16:10:00,16:30:00,16:40:00,16:50:00,17:00:00,05:45:00,06:00:00,06:30:00,06:50:00,07:10:00,...,16:50:00,16:55:00,17:00:00,17:03:00,17:06:00,17:09:00,17:15:00,17:19:00,17:35:00,17:55:00
end_time,17:09:00,17:29:00,17:39:00,17:49:00,17:59:00,06:38:00,06:50:00,07:23:00,07:41:00,08:03:00,...,17:15:00,17:20:00,17:25:00,17:26:00,17:29:00,17:32:00,17:35:00,17:39:00,17:55:00,18:15:00
start_stop_id,4744526,4744526,4744526,4744526,4744526,1224017,1224017,1224017,1224017,1224017,...,446728,446728,446728,446728,446728,446728,446728,446728,446728,446728


Fewer num_trip_starts than num_trip_end because some trips end the following day.