In [1]:
# https://pypi.org/project/gtfs-functions
# NOTE 10/21/2024 - needed to update some func calls in gtfs_functions to call on v.4 of h3 module (was still using old v3 calls)

# module source code = C:\Users\dconly\AppData\Local\ESRI\conda\envs\gtfs-funcs\Lib\site-packages\gtfs_functions

# GTFS directory - I:\Projects\Darren\PPA3_GIS\ConveyalLayers\GTFS

# NOTE - you also need to switch to gtfs-funcs env
import geopandas as gpd
import datetime as dt
from pathlib import Path

import pandas as pd

from gtfs_functions import Feed

gtfs_path = r"I:\Projects\Darren\PPA3_GIS\ConveyalLayers\GTFS\sacrt.zip"
opname = Path(gtfs_path).stem

start_date='2024-01-08'
end_date='2024-01-12'
feed = Feed(gtfs_path, start_date=start_date, end_date=end_date)

# output GIS data path
gpkg_path = r'I:\Projects\Darren\HiFrequencyTransit\hifreq_gtfs.gpkg'


In [55]:
# more on what args you can specify when defining a feed
?Feed


[1;31mInit signature:[0m
[0mFeed[0m[1;33m([0m[1;33m
[0m    [0mgtfs_path[0m[1;33m:[0m [0mstr[0m[1;33m,[0m[1;33m
[0m    [0mtime_windows[0m[1;33m:[0m [0mlist[0m [1;33m=[0m [1;33m[[0m[1;36m0[0m[1;33m,[0m [1;36m6[0m[1;33m,[0m [1;36m9[0m[1;33m,[0m [1;36m15[0m[1;33m,[0m [1;36m19[0m[1;33m,[0m [1;36m22[0m[1;33m,[0m [1;36m24[0m[1;33m][0m[1;33m,[0m[1;33m
[0m    [0mbusiest_date[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mgeo[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mpatterns[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mstart_date[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mend_date[0m[1;33m:[0m [0mstr[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m      <no docstring>
[1;31mInit docstring

In [3]:
#  frequency of departures for each route
# potentially could make a good supplemental layer of hf points based on where lines intersect

line_freq = feed.lines_freq # if throws ValueError: 'nan' is not in list, make sure your time_bounds cover full 24hr day

# frequency of departures for each segment
segments_freq = feed.segments_freq
seg_hifreq = segments_freq.loc[segments_freq['min_per_trip'] <= 20]

INFO:root:Getting segments...
INFO:root:Projecting stops onto shape...
INFO:root:Interpolating stops onto shape...
INFO:root:Sorting shape points and stops...
INFO:root:segments_df: 5050, geometry: 5050
INFO:root:adding data for all lines.


In [32]:
feed.trips.loc[feed.trips['route_id'] == '081']
test = feed.stop_times.loc[(feed.stop_times['route_id'] == '081') & (feed.stop_times['stop_id'] == '2321')]
# test.head()

INFO:root:accessing trips
INFO:root:accessing trips


In [33]:
# start with tbl where each record is seg with start and end stop
# want to convert into single list where each record is a stop.
# to do so, take the end stop IDs and append to start stop IDs
seg_spec_cols = ['geometry' , 'segment_id', 'segment_name']
cols_starts = ['end_stop_name', 'end_stop_id', *seg_spec_cols]
cols_ends = ['start_stop_name', 'start_stop_id', *seg_spec_cols]
rename = {'start_stop_id': 'stop_id', 'start_stop_name':'stop_name', 'end_stop_id': 'stop_id', 'end_stop_name':'stop_name'}

hf_starts = seg_hifreq[[f for f in seg_hifreq if f not in cols_starts]].rename(columns=rename)
hf_ends = seg_hifreq[[f for f in seg_hifreq if f not in cols_ends]].rename(columns=rename)
hf_combd = pd.concat([hf_starts, hf_ends]).drop_duplicates()
hf_combd = hf_combd.loc[(hf_combd['window'].isin(['6:00-9:00', '15:00-19:00'])) \
                    & (hf_combd['route_id'] != 'ALL_LINES')] # only want to consider frequencies during AM/PM peak

lfreq_fields = ['route_id', 'window', 'min_per_trip']
hf_combd = hf_combd.merge(line_freq, on=['route_id', 'window'], suffixes=('', '_line'))
# hf_combd = hf_combd.loc[hf_combd['min_per_trip_line'] <= 20]

# then, need to only get stops where both AM *and* PM peak meets frequency threshold 
gb_ampmpk = ['route_id', 'direction_id', 'stop_id']
windcnt = hf_combd.groupby(gb_ampmpk)['window'].count().reset_index()
hf_combd2 = hf_combd.merge(windcnt, how='left', on=['route_id', 'direction_id', 'stop_id'], suffixes=('', '_cnt'))
hf_combd2 = hf_combd2.loc[hf_combd2['window_cnt'] > 1]


# then get stop IDs where there are 2+ different route IDs serving it.

# ISSUE 10/21/2024 - This is *very* conservative because it only counts if the *exact* stop has 2 or more routes with high-freq.
# In real world, need to look at multiple stops (e.g., at an intersection, you have different stop IDs for N-S vs. E-W roads; you need to
# count all stops at an intersection if the stops for both directions have hi-freq
# maybe this needs to be semi-manual process?
hfcombd3 = hf_combd2[['route_id', 'route_name', 'direction_id', 'stop_id']].drop_duplicates()
hfr_stop = hfcombd3.groupby(['stop_id'])['route_id'].count().reset_index()
display(hfr_stop.sort_values(by='route_id', ascending=False).head())

# convert to geodataframe
hfcombd3 = hfcombd3.merge(feed.stops, on='stop_id', how='left') # 
hfcombd3 = gpd.GeoDataFrame(hfcombd3, geometry='geometry')

Unnamed: 0,stop_id,route_id
4,1184,3
394,9812,2
280,664,2
315,7034,2
314,7033,2


In [38]:
# hf_combd2.loc[(hf_combd2['min_per_trip'] - hf_combd2['min_per_trip_line']) < -5]

In [39]:
hf_combd.route_id.drop_duplicates() # display route IDs that meet hi freq threshold

0       001
292     011
296     021
304     023
314     030
416     051
876     056
892     067
908     068
928     081
1229    084
1245    177
1299     30
1310    507
1752    533
Name: route_id, dtype: object

In [102]:
# display(tdf.head(3))
# display(line_freq.head(3))
# display(tdf2.head(3))
line_freq.loc[line_freq['route_id'] == '021']

Unnamed: 0,route_id,route_name,direction_id,window,min_per_trip,ntrips,geometry
49,21,21 SUNRISE,0,0:00-6:00,360,1,"LINESTRING (-121.26715 38.67928, -121.26715 38..."
50,21,21 SUNRISE,0,0:00-6:00,360,1,"LINESTRING (-121.28963 38.72206, -121.28964 38..."
51,21,21 SUNRISE,0,15:00-19:00,30,8,"LINESTRING (-121.28963 38.72206, -121.28964 38..."
52,21,21 SUNRISE,0,19:00-22:00,36,5,"LINESTRING (-121.28963 38.72206, -121.28964 38..."
53,21,21 SUNRISE,0,22:00-24:00,120,1,"LINESTRING (-121.28963 38.72206, -121.28964 38..."
54,21,21 SUNRISE,0,6:00-9:00,30,6,"LINESTRING (-121.28963 38.72206, -121.28964 38..."
55,21,21 SUNRISE,0,9:00-15:00,30,12,"LINESTRING (-121.28963 38.72206, -121.28964 38..."
56,21,21 SUNRISE,1,0:00-6:00,360,1,"LINESTRING (-121.31133 38.58406, -121.31136 38..."
57,21,21 SUNRISE,1,15:00-19:00,30,8,"LINESTRING (-121.31133 38.58406, -121.31136 38..."
58,21,21 SUNRISE,1,19:00-22:00,36,5,"LINESTRING (-121.31133 38.58406, -121.31136 38..."


In [31]:
# export to GIS feature class (remember, cannot use arcpy because you are in separate gtfs-funcs environment)
hfcombd3.to_file(gpkg_path, driver='GPKG', layer=f'hifreq_{opname}')
