In [1]:
# https://pypi.org/project/gtfs-functions
# NOTE 10/21/2024 - needed to update some func calls in gtfs_functions to call on v.4 of h3 module (was still using old v3 calls)

# NOTE - you also need to switch to gtfs-funcs env
from gtfs_functions import Feed

gtfs_path = r"I:\Projects\Darren\PPA3_GIS\ConveyalLayers\GTFS\Unitrans_GTFS.zip"

# feed = Feed(gtfs_path)

In [2]:
# get frequency of departures from each stop, any route within same operator

time_bounds = [0, 6, 9, 15, 18, 22, 24] # hour bounds of periods during which you want to get frequency by stop
# default time bounds (aka cutoffs) = [0, 6, 9, 15, 19, 22, 24]
feed_phfreq = Feed(gtfs_path, time_windows=time_bounds)

stop_freq = feed_phfreq.stops_freq
stop_freq.head(2)

INFO:root:Reading "stop_times.txt".
INFO:root:get trips in stop_times
INFO:root:accessing trips
INFO:root:Reading "routes.txt".
INFO:root:Start date is None. You should either specify a start date or set busiest_date to True.
INFO:root:Reading "trips.txt".
INFO:root:Reading "calendar.txt".
INFO:root:Reading "calendar_dates.txt".
INFO:root:The busiest date/s of this feed or your selected date range is/are:  ['2024-11-27'] with 1752 trips.
INFO:root:In the case that more than one busiest date was found, the first one will be considered.
INFO:root:In this case is 2024-11-27.
INFO:root:Reading "stop_times.txt".
INFO:root:_trips is defined in stop_times
INFO:root:Reading "stops.txt".
INFO:root:computing patterns


Unnamed: 0,stop_id,direction_id,window,ntrips,min_per_trip,stop_name,geometry
0,22002,0,15:00-18:00,24,7,1st St & C St / Downtown (EB),POINT (-121.74254 38.54152)
1,22002,0,18:00-22:00,24,10,1st St & C St / Downtown (EB),POINT (-121.74254 38.54152)


In [3]:
#  frequency of departures for each route
line_freq = feed_phfreq.lines_freq # if throws ValueError: 'nan' is not in list, make sure your time_bounds cover full 24hr day
line_freq.head(2)

INFO:root:Reading "shapes.txt".


Unnamed: 0,route_id,route_name,direction_id,window,min_per_trip,ntrips,geometry
0,A,A Amtrak/5th/Alhambra,1,15:00-18:00,30,6,"LINESTRING (-121.69098 38.55095, -121.69051 38..."
1,A,A Amtrak/5th/Alhambra,1,18:00-22:00,34,7,"LINESTRING (-121.69098 38.55095, -121.69051 38..."


In [2]:
# frequency of departures for each segment
feed_default = Feed(gtfs_path)
segments_freq = feed_default.segments_freq
segments_freq.head(2)

seg_hifreq = segments_freq.loc[segments_freq['min_per_trip'] <= 20]

seg_hifreq.shape

INFO:root:Reading "stop_times.txt".
INFO:root:get trips in stop_times
INFO:root:accessing trips
INFO:root:Reading "routes.txt".
INFO:root:Start date is None. You should either specify a start date or set busiest_date to True.
INFO:root:Reading "trips.txt".
INFO:root:Reading "calendar.txt".
INFO:root:Reading "calendar_dates.txt".
INFO:root:The busiest date/s of this feed or your selected date range is/are:  ['2024-11-27'] with 1752 trips.
INFO:root:In the case that more than one busiest date was found, the first one will be considered.
INFO:root:In this case is 2024-11-27.
INFO:root:Reading "stop_times.txt".
INFO:root:_trips is defined in stop_times
INFO:root:Reading "stops.txt".
INFO:root:computing patterns
INFO:root:Getting segments...
INFO:root:Reading "shapes.txt".
INFO:root:Projecting stops onto shape...
INFO:root:Interpolating stops onto shape...
INFO:root:Sorting shape points and stops...
INFO:root:segments_df: 476, geometry: 476
INFO:root:adding data for all lines.


(510, 13)

In [12]:
# make df 

import pandas as pd

# start with tbl where each record is seg with start and end stop
# want to convert into single list where each record is a stop.
# to do so, take the end stop IDs and append to start stop IDs
seg_spec_cols = ['geometry' , 'segment_id', 'segment_name']
cols_starts = ['end_stop_name', 'end_stop_id', *seg_spec_cols]
cols_ends = ['start_stop_name', 'start_stop_id', *seg_spec_cols]
rename = {'start_stop_id': 'stop_id', 'start_stop_name':'stop_name', 'end_stop_id': 'stop_id', 'end_stop_name':'stop_name'}

hf_starts = seg_hifreq[[f for f in seg_hifreq if f not in cols_starts]].rename(columns=rename)
hf_ends = seg_hifreq[[f for f in seg_hifreq if f not in cols_ends]].rename(columns=rename)
hf_combd = pd.concat([hf_starts, hf_ends]).drop_duplicates()
hf_combd = hf_combd.loc[(hf_combd['window'].isin(['6:00-9:00', '15:00-19:00'])) \
                    & (hf_combd['route_id'] != 'ALL_LINES')] # only want to consider frequencies during AM/PM peak



# then, need to only get stops where both AM *and* PM peak meets frequency threshold 
gb_ampmpk = ['route_id', 'direction_id', 'stop_id']
windcnt = hf_combd.groupby(gb_ampmpk)['window'].count().reset_index()
hf_combd2 = hf_combd.merge(windcnt, how='left', on=['route_id', 'direction_id', 'stop_id'], suffixes=('', '_cnt'))
hf_combd2 = hf_combd2.loc[hf_combd2['window_cnt'] > 1]
display(hf_combd2.head())

# then get stop IDs where there are 2+ different route IDs serving it.
hfcombd3 = hf_combd2[['route_id', 'route_name', 'direction_id', 'stop_id']].drop_duplicates()
hfr_stop = hfcombd3.groupby(['stop_id'])['route_id'].count().reset_index()
hfr_stop.sort_values(by='route_id', ascending=False)

Unnamed: 0,route_id,route_name,direction_id,stop_name,window,min_per_trip,ntrips,stop_id,window_cnt
46,L,L E 8th/Pole Line/Moore/Loyola,0,B St & 5th St (NB),15:00-19:00,17,14,22022,2
47,L,L E 8th/Pole Line/Moore/Loyola,0,B St & 5th St (NB),6:00-9:00,18,10,22022,2
48,L,L E 8th/Pole Line/Moore/Loyola,1,Monarch Ln & Campbell (SB),15:00-19:00,17,14,22081,2
49,L,L E 8th/Pole Line/Moore/Loyola,1,Monarch Ln & Campbell (SB),6:00-9:00,18,10,22081,2
50,L,L E 8th/Pole Line/Moore/Loyola,0,8th St & Chestnut Lane/Grocery Outlet (EB),15:00-19:00,17,14,22102,2


Unnamed: 0,stop_id,route_id
53,22272,2
51,22258,2
40,22169,2
0,22009,1
43,22202,1
32,22118,1
33,22124,1
34,22125,1
35,22146,1
36,22147,1


In [None]:
hf_combd