In [1]:
import pandas as pd

In [2]:
def read_gtfs_files():
    """
    Reads necessary GTFS static files and returns them as DataFrames.
    """
    routes_df = pd.read_csv('GTFS-Static-Data/routes.txt')
    trips_df = pd.read_csv('GTFS-Static-Data/trips.txt')
    stops_df = pd.read_csv('GTFS-Static-Data/stops.txt')
    stop_times_df = pd.read_csv('GTFS-Static-Data/stop_times.txt')
    calendar_df = pd.read_csv('GTFS-Static-Data/calendar.txt')
    return routes_df, trips_df, stops_df, stop_times_df, calendar_df

In [5]:
def filter_by_route(routes_df, route_short_name='145'):
    """
    Filters routes by short name and returns the route ID.
    """
    bus_route_id = routes_df[routes_df['route_short_name'] == route_short_name]['route_id'].iloc[0]
    return bus_route_id

In [19]:
def filter_by_stop(stops_df, stop_name):
    sfu_stop_id = stops_df[stops_df['stop_name'].str.contains(stop_name, case=False, na=False)]['stop_id'].iloc[0]
    return sfu_stop_id

In [24]:
def filter_trips_to_SFU_by_route(trips_df, route_id, direction_id):
    """
    Filters trips by route ID.
    direction_id = 0 -> Production Way to SFU, direction_id = 1 -> SFU to Production Way
    """
    return trips_df[(trips_df['route_id'] == route_id) &
                   (direction_id == direction_id)]

In [28]:
def filter_stop_times_by_trip_and_stop(stop_times_df, trip_to_SFU_df, stop_id):
    """
    Filters stop times by trip ID and stop ID.
    """
    merged_df = pd.merge(stop_times_df, trip_to_SFU_df, on='trip_id')
    filtered_df = merged_df[merged_df['stop_id']==stop_id]
    return filtered_df

In [38]:
def get_trips_by_day(df,calendar_df, day_name):
    """
    Filter trips by day.
    
    :param df: DataFrame containing the trip data
    :param day_name: Name of the day to filter by (e.g., 'Monday')
    :return: DataFrame with trips that run on the specified day
    """
    trip_at_SFU_service = pd.merge(df, calendar_df, on='service_id')
#     print(trip_at_SFU_service)
    filtered_trip = trip_at_SFU_service[trip_at_SFU_service[day_name]==1]
    return filtered_trip

In [4]:
def calculate_time_diff_and_period(relevant_stop_times):
    """
    Calculates time differences between consecutive buses and assigns period based on arrival time.
    This function is a placeholder to illustrate where you'd calculate 'Time Diff' and 'Period'.
    """
    # Example of period assignment and time diff calculation would go here.
    # This part of the code would require significant development to accurately calculate
    # and assign periods and time differences based on your criteria.
    pass

In [39]:
def main():
    routes_df, trips_df, stops_df, stop_times_df, calendar_df = read_gtfs_files()
    
    # get bus 145 route_id and stop sfu transit bay 2
    route_id = filter_by_route(routes_df, '145')
    sfu_stop_id = filter_by_stop(stops_df,'SFU Transportation Centre @ Bay 2')
    
    trips_to_SFU = filter_trips_to_SFU_by_route(trips_df, route_id, 0)
    trips_to_SFU_arrival_time = filter_stop_times_by_trip_and_stop(stop_times_df, trips_to_SFU, sfu_stop_id)
    
    day = 'monday'
    get_trips_by_day(trips_to_SFU_arrival_time,calendar_df, day)
#     print(trips_to_SFU_arrival_time)
#     print(sfu_stop_id)
    # You would need to iterate over trip IDs and possibly filter by day using calendar information
    # Here, you'd call filter_stop_times_by_trip_and_stop for each trip ID and SFU stop ID, then calculate time diffs and periods.
    
    # This main function setup is quite simplified and needs expansion to cover your requirements.

In [40]:
main()

      trip_id arrival_time departure_time  stop_id  stop_sequence  \
0    13678472      6:08:13        6:08:13     1873              5   
1    13678473      6:30:13        6:30:13     1873              5   
2    13678474      6:45:13        6:45:13     1873              5   
3    13678475     18:16:04       18:16:04     1873              5   
4    13678476     18:28:04       18:28:04     1873              5   
..        ...          ...            ...      ...            ...   
215  13678861     16:26:56       16:26:56     1873              5   
216  13678862     17:26:13       17:26:13     1873              5   
217  13678863     18:26:13       18:26:13     1873              5   
218  13678864     19:27:04       19:27:04     1873              5   
219  13678865     20:27:13       20:27:13     1873              5   

     stop_headsign  pickup_type  drop_off_type  shape_dist_traveled route_id  \
0              NaN            0              0               6.4304     6658   
1          