In [14]:
import pandas as pd

def import_data():
    gtfs_dir = '/Users/denisvoroncov/Desktop/Project/Vienna Linien — Timetable data GTFS Vienna/data.gv.at/'

    # Load data
    agency_df = pd.read_csv(gtfs_dir + 'GTFS Agency-6.plain')
    stops_df = pd.read_csv(gtfs_dir + 'GTFS Stops-9.plain')
    routes_df = pd.read_csv(gtfs_dir + 'GTFS Routes-5.plain')
    trips_df = pd.read_csv(gtfs_dir + 'GTFS Trips-7.plain')
    stop_times_df = pd.read_csv(gtfs_dir + 'GTFS Stop Times-8.plain')
    calendar_df = pd.read_csv(gtfs_dir + 'GTFS Calendar-2.plain')
    calendar_dates_df = pd.read_csv(gtfs_dir + 'GTFS Calendar Dates-3.plain')
    shapes_df = pd.read_csv(gtfs_dir + 'GTFS Shapes-1.plain')
    
    # Return all DataFrames
    return agency_df, stops_df, routes_df, trips_df, stop_times_df, calendar_df, calendar_dates_df, shapes_df

# Import and display data
agency, stops, routes, trips, stop_times, calendar, calendar_dates, shapes = import_data()

In [None]:
import scipy.stats as stats


# Transfer reliability 

def calculate_transfer_probability(scheduled_arrival, next_bus_departure, shape=2, scale=3):
    
    transfer_window = next_bus_departure - scheduled_arrival
    if transfer_window < 0:
        return 0.0  # No chance of a successful transfer if arrival is after departure
    return stats.gamma.cdf(transfer_window, a=shape, scale=scale)

     service_id      date  exception_type
0            T0  20241224               2
1            T0  20241225               2
2            T0  20241226               2
3            T0  20241231               2
4            T0  20250101               2
...         ...       ...             ...
6519         UZ  20251118               2
6520         UZ  20251125               2
6521         UZ  20251202               2
6522         UZ  20251209               2
6523       UZ#2  20241231               2

[6524 rows x 3 columns]


In [None]:
import sys
import heapq
from collections import defaultdict
from datetime import datetime, timedelta
from scipy.stats import norm

# Hilfsfunktion: Zeit in Minuten umwandeln
def time_to_minutes(time_str):
    hours, minutes, seconds = map(int, time_str.split(":"))
    return hours * 60 + minutes + seconds / 60

def minutes_to_time(minutes):
    hours = int(minutes // 60)
    minutes = int(minutes % 60)
    return f"{hours:02d}:{minutes:02d}"

def get_weekday(date):
    weekdays = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
    return weekdays[date.weekday()]

from datetime import datetime


#FFFFFFFIIIIIIIIXXXXXXEEEEEEDDDDDD

def is_service_available(service_id, date, calendar, calendar_dates):

    # Convert date to string in YYYYMMDD format
    date_str = date.strftime("%Y%m%d")
    weekday = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"][date.weekday()]

    # Step 1: Check for exceptions in calendar_dates
    if service_id in calendar_dates["service_id"].values:
        exceptions = calendar_dates[calendar_dates["service_id"] == service_id]
        for _, exception in exceptions.iterrows():
            if exception["date"] == int(date_str):
                if exception["exception_type"] == 2:  # Service is added as an exception
                    return True
                elif exception["exception_type"] == 1:  # Service is removed as an exception
                    return False

    # Step 2: Check for regular service in calendar
    if service_id in calendar["service_id"].values:
        service = calendar[calendar["service_id"] == service_id]
        # Check if date is within start_date and end_date
        if int(service["start_date"].iloc[0]) <= int(date_str) <= int(service["end_date"].iloc[0]):
            # Check if the service operates on this weekday
            is_available = (service[weekday].iloc[0] == 1)
            return is_available

def identify_transfer_stops(stop_times, trips, date, calendar, calendar_dates): 
    
    
    # RUNTIME TO BE ASSESSED


    # Map trip_id to route_id and service_id
    trip_to_route = trips.set_index("trip_id")["route_id"].to_dict()
    trip_to_service = trips.set_index("trip_id")["service_id"].to_dict()

    # Add route_id and service_id to stop_times
    stop_times["route_id"] = stop_times["trip_id"].map(trip_to_route)
    stop_times["service_id"] = stop_times["trip_id"].map(trip_to_service)

    # Filter stop_times to include only active services on the given date
    stop_times = stop_times[stop_times["service_id"].apply(
        lambda service_id: is_service_available(service_id, date, calendar, calendar_dates)
    )]

    # Group by stop_id and count unique route_ids
    stop_route_counts = stop_times.groupby("stop_id")["route_id"].nunique()

    # Identify stops with more than one route
    transfer_stops = set(stop_route_counts[stop_route_counts > 1].index)

    return transfer_stops


# Simplify stop_times by collapsing non-transfer stops
def simplify_network_with_no_transfer_stops(start_stop_name, end_stop_name, stop_times, trips, transfer_stops, date, calendar, calendar_dates): 
    
    
    # RUNTIME TO BE ASSESSED

   
    # From stop name to stop id
    stop_name_to_stop_id = stops.set_index("stop_name")["stop_id"]
    start_stop_id = stop_name_to_stop_id[start_stop_name]
    end_stop_id = stop_name_to_stop_id[end_stop_name]

    trip_id_to_service = trips.set_index("trip_id")["service_id"].to_dict()

    simplified_rows = []
    grouped = stop_times.groupby("trip_id")
    
    for trip_id, group in grouped: # group of stop in a particular trip
        service_id = trip_id_to_service[trip_id]
        if not is_service_available(service_id, date, calendar, calendar_dates):
            continue
        print("we start ")

        group = group.sort_values("stop_sequence")

        for _, row in group.iterrows(): # iterate through each row of sorted stops in a trip
            stop_id = row["stop_id"]

            if stop_id in transfer_stops or {start_stop_id, end_stop_id}:  # Handle transfer stops + start point + destination point
                # Add the current collapsed segment to the result
                simplified_rows.append(row)

    # Convert the simplified rows into a DataFrame
    simplified_stop_times = pd.DataFrame(simplified_rows)

    # Reset stop_sequence values to be consecutive for each trip_id
    simplified_stop_times["stop_sequence"] = simplified_stop_times.groupby("trip_id").cumcount() + 1

    return simplified_stop_times


In [None]:
def prepare_network(
    start_stop_name, 
    end_stop_name, 
    start_datetime, 
    time_budget_minutes, 
    stop_times, 
    trips, 
    calendar, 
    calendar_dates, 
    stops
):  
    
    
    # INAPPROPRIATE RUNTIME BC OF PRUNNING ALGORYTHM V.1, TO DO LATER


    
    # Parse the start datetime and calculate time window
    start_time_obj = datetime.strptime(start_datetime, "%Y-%m-%d %H:%M:%S")
    end_time_obj = start_time_obj + timedelta(minutes=time_budget_minutes)
    date = start_time_obj.date()

    # Filter for active trips today using `is_service_available`
    trip_id_to_service = trips.set_index("trip_id")["service_id"].to_dict()
    stop_times["service_id"] = stop_times["trip_id"].map(trip_id_to_service)

    # Keep only active services
    stop_times = stop_times[stop_times["service_id"].apply(
        lambda service_id: False if pd.isna(service_id) else is_service_available(service_id, date, calendar, calendar_dates)
    )]

    # Convert arrival and departure times to minutes from midnight
    stop_times["arrival_minutes"] = stop_times["arrival_time"].apply(time_to_minutes)
    stop_times["departure_minutes"] = stop_times["departure_time"].apply(time_to_minutes)

    # Filter out stops outside the time window
    # Convert datetime objects to "HH:MM:SS" string format for compatibility with time_to_minutes
    start_time_str = start_time_obj.strftime("%H:%M:%S")
    end_time_str = end_time_obj.strftime("%H:%M:%S")

    # Use time_to_minutes for consistency
    start_minutes = time_to_minutes(start_time_str)
    end_minutes = time_to_minutes(end_time_str)
    stop_times = stop_times[
        (stop_times["arrival_minutes"] >= start_minutes) & 
        (stop_times["departure_minutes"] <= end_minutes)
    ]

    # Identify transfer stops
    transfer_stops = identify_transfer_stops(stop_times, trips, date, calendar, calendar_dates)

    # Simplify the network
    simplified_stop_times = simplify_network_with_no_transfer_stops(
        start_stop_name, 
        end_stop_name, 
        stop_times, 
        trips, 
        transfer_stops, 
        date, 
        calendar, 
        calendar_dates
    )

    return simplified_stop_times

In [18]:
# Define input parameters
start_stop_name = "Pötzleinsdorf"
end_stop_name = "Konstanziagasse"
start_datetime = "2025-10-16 14:30:00"
time_budget_minutes = 120  # 2 hours

# Run the `prepare_network` function
simplified_stop_times = prepare_network(
    start_stop_name,
    end_stop_name,
    start_datetime,
    time_budget_minutes,
    stop_times,
    trips,
    calendar,
    calendar_dates,
    stops
)

# Output the simplified stop_times DataFrame
print(simplified_stop_times)

KeyboardInterrupt: 

The transfer probability is calculated using cdf, but we say there are not depature delays. If there are, reliability only goes higher, for all itineraries, we want the most extreme case however. Furthermore, it is simply easier to calculate. 