In [1]:
import urbanaccess as ua
from pathlib import Path
import os
from zipfile import ZipFile
import osmnx as ox
import igraph as ig
import datetime

import numpy as np
import pandas as pd

In [2]:
GTFS_DATA_DIR = Path('./sample_data/gtfs_monday_extracted')
TMP_DATA_PATH = Path('./tmp')

In [3]:
all_gtfs_files = [GTFS_DATA_DIR.joinpath(e) for e in os.listdir(GTFS_DATA_DIR) if Path(e).suffix == '.zip']
path = all_gtfs_files[0]

In [4]:
gdf = ox.geocode_to_gdf({'city': 'Amsterdam'})
bbox = (
    gdf.loc[0, 'bbox_west'],
    gdf.loc[0, 'bbox_south'],
    gdf.loc[0, 'bbox_east'],
    gdf.loc[0, 'bbox_north'],
)

with ZipFile(path) as ref:
    ref.extractall(TMP_DATA_PATH)
    loaded_feeds = ua.gtfs.load.gtfsfeed_to_df(gtfsfeed_path=str(TMP_DATA_PATH.absolute()), 
                                               validation=True, 
                                               verbose=True,
                                               bbox=bbox, 
                                               remove_stops_outsidebbox=True,
                                               append_definitions=True)
    for f in os.listdir(TMP_DATA_PATH):
        os.remove(TMP_DATA_PATH.joinpath(f))

Checking GTFS text file header whitespace... Reading files using encoding: utf-8 set in configuration.
GTFS text file header whitespace check completed. Took 0.02 seconds
--------------------------------
Processing GTFS feed: tmp
GTFS feed: tmp, calendar_dates uses the same service_id across multiple agency_ids. This feed calendar_dates table will be modified from its original format to provide service_ids for each agency using a one to many join
GTFS feed: tmp, stops uses the same stop_id across multiple agency_ids. This feed stops table will be modified from its original format to provide stop_ids for each agency using a one to many join
agency.txt agency_name column has more than one agency name listed. Unique agency id was assigned using the agency id and associated agency name.
Unique agency id operation complete. Took 0.56 seconds
Unique GTFS feed id operation complete. Took 0.00 seconds
Records:
              stop_id  unique_agency_id             stop_name stop_code  \
0      st

In [5]:
rect_arrivals = loaded_feeds.stop_times['arrival_time'].apply(lambda x: int(x.split(':')[0]))
rect_departures = loaded_feeds.stop_times['departure_time'].apply(lambda x: int(x.split(':')[0]))

In [6]:
# Drop all runs where the arrival time is after midnight
loaded_feeds.stop_times = loaded_feeds.stop_times[rect_arrivals < 24]
loaded_feeds.stop_times = loaded_feeds.stop_times[rect_departures < 24]

  loaded_feeds.stop_times = loaded_feeds.stop_times[rect_departures < 24]


In [7]:
date = datetime.datetime.strptime(str(loaded_feeds.calendar_dates.date.unique()[0]), '%Y%m%d')

loaded_feeds.stop_times['arrival_time'] = pd.to_datetime(loaded_feeds.stop_times['arrival_time'].apply(lambda x: str(date.date()) + ' ' + x))
loaded_feeds.stop_times['departure_time'] = pd.to_datetime(loaded_feeds.stop_times['departure_time'].apply(lambda x: str(date.date()) + ' ' + x))

In [8]:
day_times = pd.to_datetime(pd.Series([date + datetime.timedelta(hours=e) for e in range(25)]))

In [9]:
transit_net = ua.gtfs.network.create_transit_net(
    gtfsfeeds_dfs=loaded_feeds,
    calendar_dates_lookup={'unique_feed_id': 'tmp_1'},
    day='monday',
    timerange=['00:00:00', '23:59:59'],
)

Using calendar to extract service_ids to select trips.
0 service_ids were extracted from calendar
0 trip(s) 0.00 percent of 14,695 total trip records were found in calendar for GTFS feed(s): []
0 trip(s) 0 percent of 14,695 total trip records were found in calendar for GTFS feed: tmp
Using calendar_dates to supplement service_ids extracted from calendar to select trips.
Found 828 records that matched query: column: unique_feed_id and string: tmp_1 for GTFS feed(s): ['tmp']
An additional 828 service_ids were extracted from calendar_dates. Total service_ids extracted: 828
14,695 of 14,695 total trips were extracted representing calendar day: monday and calendar_dates search parameters: {'unique_feed_id': 'tmp_1'}. Took 0.04 seconds
There are no departure time records missing from trips following the specified schedule. There are no records to interpolate.
Difference between stop times has been successfully calculated. Took 0.74 seconds
Stop times from 00:00:00 to 23:59:59 successfully se

## Stop frequencies

In [10]:
loaded_feeds.stops = loaded_feeds.stops[loaded_feeds.stops['unique_agency_id']!='nan']
loaded_feeds.stops["stop_id"] = loaded_feeds.stops[["stop_id", "unique_agency_id"]].agg('_'.join, axis=1)
loaded_feeds.stops
loaded_feeds.stop_times = loaded_feeds.stop_times[loaded_feeds.stop_times['unique_agency_id']!='nan']
loaded_feeds.stop_times["stop_id"] = loaded_feeds.stop_times[["stop_id", "unique_agency_id"]].agg('_'.join, axis=1)
loaded_feeds.stop_times

Unnamed: 0,stop_id,unique_agency_id,stop_name,stop_code,stop_desc,stop_lat,stop_lon,zone_id,stop_url,tts_stop_name,...,parent_station,stop_timezone,wheelchair_boarding,level_id,area_id,unique_feed_id,route_type,location_type_desc,wheelchair_boarding_desc,unique_stop_id
659,2422355_ns,ns,Halfweg-Zwanenburg,,,52.385901,4.747019,IFF:hwzb,,,...,stoparea:18248,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2422355_ns
660,2323539_ns,ns,Duivendrecht,,,52.323726,4.937325,IFF:dvd,,,...,stoparea:17781,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323539_ns
661,2323216_ns_international,ns_international,Amsterdam Centraal,,,52.379535,4.899865,IFF:asd,,,...,stoparea:18188,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323216_ns_international
662,2323216_ns,ns,Amsterdam Centraal,,,52.379535,4.899865,IFF:asd,,,...,stoparea:18188,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323216_ns
663,2422356_ns,ns,Halfweg-Zwanenburg,,,52.385982,4.747075,IFF:hwzb,,,...,stoparea:18248,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2422356_ns
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1967,2323217_ns,ns,Amsterdam Centraal,,,52.378890,4.901657,IFF:asd,,,...,stoparea:18188,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323217_ns
1968,2323217_ns_international,ns_international,Amsterdam Centraal,,,52.378890,4.901657,IFF:asd,,,...,stoparea:18188,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323217_ns_international
1969,2323273_ns,ns,Amsterdam Sloterdijk,,,52.388265,4.835599,IFF:ass,,,...,stoparea:18177,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323273_ns
1970,2429585_gvb,gvb,"Haarlemmerlieden, Ruigoord",02329,,52.407631,4.749901,,,,...,stoparea:384903,,0,,,tmp_1,3.0,stop,No accessibility information available for the...,2429585_gvb


In [19]:
loaded_feeds.stop_times = loaded_feeds.stop_times[loaded_feeds.stop_times['unique_agency_id']!='nan']
loaded_feeds.stop_times["stop_id"] = loaded_feeds.stop_times[["stop_id", "unique_agency_id"]].agg('_'.join, axis=1)
loaded_feeds.stop_times

Unnamed: 0,trip_id,stop_id,stop_sequence,stop_headsign,arrival_time,departure_time,pickup_type,drop_off_type,continuous_pickup,continuous_drop_off,shape_dist_traveled,timepoint,unique_agency_id,unique_feed_id,route_type,pickup_type_desc,drop_off_type_desc,timepoint_desc,departure_time_sec,unique_trip_id
157169,121350818,2422031_ns,22,,2021-11-29 13:48:00,2021-11-29 13:49:00,0,0,,,207869.0,1,ns,tmp_1,2,Regularly Scheduled,Regularly Scheduled,Exact times,49740,121350818_ns
157170,121350818,2422149_ns,23,,2021-11-29 13:55:00,2021-11-29 13:57:00,0,0,,,216800.0,1,ns,tmp_1,2,Regularly Scheduled,Regularly Scheduled,Exact times,50220,121350818_ns
155459,121350830,2422145_ns,10,,2021-11-29 14:02:00,2021-11-29 14:04:00,0,0,,,42564.0,1,ns,tmp_1,2,Regularly Scheduled,Regularly Scheduled,Exact times,50640,121350830_ns
155460,121350830,2422027_ns,11,,2021-11-29 14:10:00,2021-11-29 14:12:00,0,0,,,51474.0,1,ns,tmp_1,2,Regularly Scheduled,Regularly Scheduled,Exact times,51120,121350830_ns
158170,121350872,2422031_ns,22,,2021-11-29 20:48:00,2021-11-29 20:49:00,0,0,,,207869.0,1,ns,tmp_1,2,Regularly Scheduled,Regularly Scheduled,Exact times,74940,121350872_ns
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89351,141159258,15096_gvb,2,,2021-11-29 23:56:02,2021-11-29 23:56:20,0,0,,,466.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,86180,141159258_gvb
89352,141159258,322625_gvb,3,,2021-11-29 23:57:04,2021-11-29 23:57:22,0,0,,,835.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,86242,141159258_gvb
89353,141159258,15520_gvb,4,,2021-11-29 23:58:09,2021-11-29 23:58:27,0,0,,,1215.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,86307,141159258_gvb
89354,141159258,15530_gvb,5,,2021-11-29 23:58:57,2021-11-29 23:59:15,0,0,,,1503.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,86355,141159258_gvb


In [20]:
stop_freq = loaded_feeds.stops[["stop_id", "stop_name", "stop_lat", "stop_lon"]]
stop_freq = stop_freq.drop_duplicates()

for h in range(24):
    stop_freq[f"freq_h_{h}"] = np.zeros(len(stop_freq))

In [21]:
loaded_feeds.stops

Unnamed: 0,stop_id,unique_agency_id,stop_name,stop_code,stop_desc,stop_lat,stop_lon,zone_id,stop_url,tts_stop_name,...,parent_station,stop_timezone,wheelchair_boarding,level_id,area_id,unique_feed_id,route_type,location_type_desc,wheelchair_boarding_desc,unique_stop_id
659,2422355_ns,ns,Halfweg-Zwanenburg,,,52.385901,4.747019,IFF:hwzb,,,...,stoparea:18248,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2422355_ns
660,2323539_ns,ns,Duivendrecht,,,52.323726,4.937325,IFF:dvd,,,...,stoparea:17781,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323539_ns
661,2323216_ns_international,ns_international,Amsterdam Centraal,,,52.379535,4.899865,IFF:asd,,,...,stoparea:18188,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323216_ns_international
662,2323216_ns,ns,Amsterdam Centraal,,,52.379535,4.899865,IFF:asd,,,...,stoparea:18188,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323216_ns
663,2422356_ns,ns,Halfweg-Zwanenburg,,,52.385982,4.747075,IFF:hwzb,,,...,stoparea:18248,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2422356_ns
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1967,2323217_ns,ns,Amsterdam Centraal,,,52.378890,4.901657,IFF:asd,,,...,stoparea:18188,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323217_ns
1968,2323217_ns_international,ns_international,Amsterdam Centraal,,,52.378890,4.901657,IFF:asd,,,...,stoparea:18188,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323217_ns_international
1969,2323273_ns,ns,Amsterdam Sloterdijk,,,52.388265,4.835599,IFF:ass,,,...,stoparea:18177,,0,,,tmp_1,2.0,stop,No accessibility information available for the...,2323273_ns
1970,2429585_gvb,gvb,"Haarlemmerlieden, Ruigoord",02329,,52.407631,4.749901,,,,...,stoparea:384903,,0,,,tmp_1,3.0,stop,No accessibility information available for the...,2429585_gvb


In [22]:
served_stops

Unnamed: 0,trip_id,stop_id,stop_sequence,stop_headsign,arrival_time,departure_time,pickup_type,drop_off_type,continuous_pickup,continuous_drop_off,shape_dist_traveled,timepoint,unique_agency_id,unique_feed_id,route_type,pickup_type_desc,drop_off_type_desc,timepoint_desc,departure_time_sec,unique_trip_id
456,139702512,2403067,1,,2021-11-29 00:36:00,2021-11-29 00:36:00,0,1,,,0.0,1,gvb,tmp_1,3,Regularly Scheduled,Not available,Exact times,2160,139702512_gvb
457,139702512,15326,2,,2021-11-29 00:37:21,2021-11-29 00:37:39,0,0,,,865.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,2259,139702512_gvb
458,139702512,15739,3,,2021-11-29 00:38:13,2021-11-29 00:38:31,0,0,,,1308.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,2311,139702512_gvb
459,139702512,15387,4,,2021-11-29 00:39:04,2021-11-29 00:39:05,0,0,,,1600.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,2345,139702512_gvb
460,139702512,652267,5,,2021-11-29 00:39:39,2021-11-29 00:39:57,0,0,,,2086.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,2397,139702512_gvb
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7706,140847778,1535321,18,,2021-11-29 00:53:51,2021-11-29 00:54:09,0,0,,,13189.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,3249,140847778_gvb
7707,140847778,640284,19,,2021-11-29 00:56:35,2021-11-29 00:56:53,0,0,,,14138.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,3413,140847778_gvb
7708,140847778,103176,20,,2021-11-29 00:57:42,2021-11-29 00:58:00,0,0,,,14532.0,1,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Exact times,3480,140847778_gvb
7709,140847778,15719,21,,2021-11-29 00:58:45,2021-11-29 00:59:03,0,0,,,15103.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,3543,140847778_gvb


In [23]:
for i in range(len(day_times[:24])):
    served_stops = loaded_feeds.stop_times[(loaded_feeds.stop_times.arrival_time >= day_times[i]) & (loaded_feeds.stop_times.arrival_time <= day_times[i+1])]
    served_stops_count = served_stops.groupby('stop_id').size()
    served_stops_count_ids = served_stops_count.index
    stop_freq.loc[stop_freq['stop_id'].isin(served_stops_count_ids),f'freq_h_{i}'] = served_stops_count.values

In [24]:
stop_freq.index = stop_freq["stop_id"].values
stop_freq = stop_freq.drop(columns=["stop_id"])

In [25]:
stop_freq

Unnamed: 0,stop_name,stop_lat,stop_lon,freq_h_0,freq_h_1,freq_h_2,freq_h_3,freq_h_4,freq_h_5,freq_h_6,...,freq_h_14,freq_h_15,freq_h_16,freq_h_17,freq_h_18,freq_h_19,freq_h_20,freq_h_21,freq_h_22,freq_h_23
2422355_ns,Halfweg-Zwanenburg,52.385901,4.747019,0.0,0.0,0.0,0.0,0.0,2.0,6.0,...,5.0,5.0,6.0,6.0,5.0,4.0,3.0,2.0,2.0,2.0
2323539_ns,Duivendrecht,52.323726,4.937325,0.0,0.0,0.0,0.0,0.0,1.0,5.0,...,5.0,5.0,6.0,6.0,4.0,4.0,2.0,2.0,2.0,2.0
2323216_ns_international,Amsterdam Centraal,52.379535,4.899865,0.0,0.0,0.0,0.0,0.0,0.0,6.0,...,0.0,5.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,3.0
2323216_ns,Amsterdam Centraal,52.379535,4.899865,0.0,0.0,0.0,0.0,0.0,1.0,2.0,...,0.0,4.0,6.0,6.0,4.0,4.0,0.0,0.0,3.0,3.0
2422356_ns,Halfweg-Zwanenburg,52.385982,4.747075,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,4.0,2.0,5.0,6.0,5.0,8.0,4.0,3.0,3.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2323217_ns,Amsterdam Centraal,52.378890,4.901657,0.0,0.0,0.0,0.0,0.0,0.0,8.0,...,10.0,12.0,11.0,12.0,10.0,10.0,8.0,8.0,8.0,7.0
2323217_ns_international,Amsterdam Centraal,52.378890,4.901657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,10.0,0.0,12.0,12.0,10.0,0.0,0.0,8.0,0.0
2323273_ns,Amsterdam Sloterdijk,52.388265,4.835599,0.0,0.0,0.0,0.0,0.0,1.0,8.0,...,10.0,12.0,12.0,12.0,11.0,10.0,10.0,8.0,8.0,8.0
2429585_gvb,"Haarlemmerlieden, Ruigoord",52.407631,4.749901,0.0,0.0,0.0,0.0,0.0,0.0,8.0,...,0.0,0.0,11.0,12.0,12.0,10.0,0.0,0.0,0.0,0.0


## Segment Frequencies

In [11]:
loaded_feeds.stop_times

Unnamed: 0,trip_id,stop_id,stop_sequence,stop_headsign,arrival_time,departure_time,pickup_type,drop_off_type,continuous_pickup,continuous_drop_off,shape_dist_traveled,timepoint,unique_agency_id,unique_feed_id,route_type,pickup_type_desc,drop_off_type_desc,timepoint_desc,departure_time_sec
0,138660065,14812,1,,2021-11-29 04:49:00,2021-11-29 04:49:00,0,1,,,0.0,1,gvb,tmp_1,3,Regularly Scheduled,Not available,Exact times,17340
1,138660065,15400,2,,2021-11-29 04:49:30,2021-11-29 04:49:48,0,0,,,431.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,17388
2,138660065,14845,3,,2021-11-29 04:50:48,2021-11-29 04:51:06,0,0,,,1152.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,17466
3,138660065,14843,4,,2021-11-29 04:51:16,2021-11-29 04:51:17,0,0,,,1381.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,17477
4,138660065,680575,5,,2021-11-29 04:52:14,2021-11-29 04:52:32,0,0,,,2072.0,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,17552
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172561,137723011,165054,1,,2021-11-29 18:00:00,2021-11-29 18:00:00,0,1,,,,1,gvb,tmp_1,4,Regularly Scheduled,Not available,Exact times,64800
172562,137723011,165055,2,,2021-11-29 18:06:00,2021-11-29 18:06:00,1,0,,,,1,gvb,tmp_1,4,Not available,Regularly Scheduled,Exact times,65160
172563,136407107,1762389,1,,2021-11-29 14:15:00,2021-11-29 14:15:00,0,1,,,,1,gvb,tmp_1,4,Regularly Scheduled,Not available,Exact times,51300
172564,136407107,239520,2,,2021-11-29 14:20:00,2021-11-29 14:20:00,1,0,,,,1,gvb,tmp_1,4,Not available,Regularly Scheduled,Exact times,51600


In [13]:
# Generate arrival_stop_id for each trip
for trip_id in loaded_feeds.stop_times['trip_id'].unique():
    loaded_feeds.stop_times.loc[loaded_feeds.stop_times['trip_id']==trip_id, "stop_id_provenance"] = loaded_feeds.stop_times.loc[loaded_feeds.stop_times['trip_id']==trip_id, "stop_id"].shift(1)

In [28]:
a = loaded_feeds.stop_times
a.groupby('trip_id')["stop_id"].shift()

157169            NaN
157170     2422031_ns
155459            NaN
155460     2422145_ns
158170            NaN
             ...     
89351     1525666_gvb
89352       15096_gvb
89353      322625_gvb
89354       15520_gvb
89355       15530_gvb
Name: stop_id, Length: 168025, dtype: object

In [14]:
loaded_feeds.stop_times

Unnamed: 0,trip_id,stop_id,stop_sequence,stop_headsign,arrival_time,departure_time,pickup_type,drop_off_type,continuous_pickup,continuous_drop_off,...,timepoint,unique_agency_id,unique_feed_id,route_type,pickup_type_desc,drop_off_type_desc,timepoint_desc,departure_time_sec,stop_id_arrival,stop_id_provenance
0,138660065,14812,1,,2021-11-29 04:49:00,2021-11-29 04:49:00,0,1,,,...,1,gvb,tmp_1,3,Regularly Scheduled,Not available,Exact times,17340,,
1,138660065,15400,2,,2021-11-29 04:49:30,2021-11-29 04:49:48,0,0,,,...,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,17388,,14812
2,138660065,14845,3,,2021-11-29 04:50:48,2021-11-29 04:51:06,0,0,,,...,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,17466,,15400
3,138660065,14843,4,,2021-11-29 04:51:16,2021-11-29 04:51:17,0,0,,,...,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,17477,,14845
4,138660065,680575,5,,2021-11-29 04:52:14,2021-11-29 04:52:32,0,0,,,...,0,gvb,tmp_1,3,Regularly Scheduled,Regularly Scheduled,Approximate times,17552,,14843
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172561,137723011,165054,1,,2021-11-29 18:00:00,2021-11-29 18:00:00,0,1,,,...,1,gvb,tmp_1,4,Regularly Scheduled,Not available,Exact times,64800,,
172562,137723011,165055,2,,2021-11-29 18:06:00,2021-11-29 18:06:00,1,0,,,...,1,gvb,tmp_1,4,Not available,Regularly Scheduled,Exact times,65160,,165054
172563,136407107,1762389,1,,2021-11-29 14:15:00,2021-11-29 14:15:00,0,1,,,...,1,gvb,tmp_1,4,Regularly Scheduled,Not available,Exact times,51300,,
172564,136407107,239520,2,,2021-11-29 14:20:00,2021-11-29 14:20:00,1,0,,,...,1,gvb,tmp_1,4,Not available,Regularly Scheduled,Exact times,51600,,1762389


In [62]:
seg_freq = loaded_feeds.stop_times[["stop_id", "stop_id_provenance"]]
seg_freq = seg_freq.dropna()
seg_freq = seg_freq.drop_duplicates()
seg_freq.set_index(["stop_id", "stop_id_provenance"], inplace=True)

for h in range(24):
    seg_freq[f"freq_h_{h}"] = np.zeros(len(seg_freq))

In [77]:
for i in range(len(day_times[:24])):
    served_stops = loaded_feeds.stop_times[(loaded_feeds.stop_times.arrival_time >= day_times[i]) & (loaded_feeds.stop_times.arrival_time <= day_times[i+1])]
    serv_counts = served_stops.groupby(["stop_id", "stop_id_provenance"]).size()
    seg_freq.loc[(serv_counts.index), f"freq_h_{i}"] = serv_counts.values

In [81]:
seg_freq.loc[seg_freq['freq_h_9']>0,'freq_h_9']

stop_id  stop_id_provenance
327331   15636                  9.0
15530    1523014               11.0
15059    15530                  7.0
14412    15059                  7.0
1530743  14412                  7.0
                               ... 
2429588  2429587                2.0
2429589  2429588                2.0
2422006  2323266                1.0
2429583  1136391                1.0
14863    2429586                1.0
Name: freq_h_9, Length: 1382, dtype: float64