In [22]:
import pandas as pd
import networkx as nx
from datetime import datetime, timedelta

In [39]:
def time_to_minutes(time_str):
    """
    Convert a HH:MM:SS time string to minutes since midnight.
    If the time goes beyond 24:00:00 (e.g., 25:30:00), it handles it correctly.
    """
    hours, minutes, seconds = map(int, time_str.split(':'))
    return hours * 60 + minutes + seconds / 60

In [40]:
def load_gtfs_data(tgv_folder, ter_folder):
    """
    Load GTFS data from the given TGV and TER folders.
    """
    tgv_stops = pd.read_csv(f"{tgv_folder}/stops.txt")
    tgv_stop_times = pd.read_csv(f"{tgv_folder}/stop_times.txt")
    tgv_routes = pd.read_csv(f"{tgv_folder}/routes.txt")
    tgv_trips = pd.read_csv(f"{tgv_folder}/trips.txt")

    ter_stops = pd.read_csv(f"{ter_folder}/stops.txt")
    ter_stop_times = pd.read_csv(f"{ter_folder}/stop_times.txt")
    ter_routes = pd.read_csv(f"{ter_folder}/routes.txt")
    ter_trips = pd.read_csv(f"{ter_folder}/trips.txt")

    return {
        "tgv": {
            "stops": tgv_stops,
            "stop_times": tgv_stop_times,
            "routes": tgv_routes,
            "trips": tgv_trips,
        },
        "ter": {
            "stops": ter_stops,
            "stop_times": ter_stop_times,
            "routes": ter_routes,
            "trips": ter_trips,
        },
    }

In [41]:
def build_graph(gtfs_data):
    """
    Build a graph from the GTFS data.
    - Nodes: stop_ids
    - Edges: trips between stops with weight as the travel time (in minutes).
    """
    G = nx.DiGraph()

    for category, data in gtfs_data.items():
        stop_times = data["stop_times"]

        # Sort stop_times by trip_id and stop_sequence to ensure proper order
        stop_times = stop_times.sort_values(by=["trip_id", "stop_sequence"])

        # Iterate through each trip and add edges to the graph
        for trip_id, group in stop_times.groupby("trip_id"):
            previous_stop = None
            previous_departure = None

            for _, row in group.iterrows():
                stop_id = row["stop_id"]
                arrival_time = row["arrival_time"]
                departure_time = row["departure_time"]

                if previous_stop is not None:
                    # Calculate travel time in minutes
                    travel_time = time_to_minutes(arrival_time) - time_to_minutes(previous_departure)

                    # Handle cases where times span midnight
                    if travel_time < 0:
                        travel_time += 24 * 60

                    # Add edge to the graph
                    G.add_edge(
                        previous_stop,
                        stop_id,
                        weight=travel_time,
                        trip_id=trip_id,
                        category=category,
                    )

                previous_stop = stop_id
                previous_departure = departure_time

    return G

In [42]:
tgv_folder = "export_gtfs_voyages"
ter_folder = "export-ter-gtfs-last"

In [43]:
gtfs_data = load_gtfs_data(tgv_folder, ter_folder)

In [44]:
graph = build_graph(gtfs_data)

In [45]:
def find_fastest_path(graph, gtfs_data, source_city, destination_city, current_time):
    """
    Find the fastest path between two cities using Dijkstra's algorithm.
    """
    # Map city names to stop_ids
    def get_stop_ids(city_name, stops_df):
        return stops_df[stops_df["stop_name"].str.contains(city_name, case=False, na=False)]["stop_id"].tolist()

    source_stops = []
    destination_stops = []

    for category, data in gtfs_data.items():
        stops = data["stops"]
        source_stops += get_stop_ids(source_city, stops)
        destination_stops += get_stop_ids(destination_city, stops)

    if not source_stops or not destination_stops:
        raise ValueError("Source or destination city not found in the data.")

    # Adjust current time to minutes since midnight
    now_minutes = current_time.hour * 60 + current_time.minute

    # Filter edges based on departure times
    filtered_graph = nx.DiGraph()
    for u, v, data in graph.edges(data=True):
        departure_time = time_to_minutes(data.get("departure_time", "00:00:00"))
        if departure_time >= now_minutes:
            filtered_graph.add_edge(u, v, **data)

    # Find the shortest path using Dijkstra's algorithm
    min_time = float("inf")
    best_path = None

    for source in source_stops:
        for destination in destination_stops:
            try:
                path = nx.shortest_path(filtered_graph, source=source, target=destination, weight="weight")
                time = nx.shortest_path_length(filtered_graph, source=source, target=destination, weight="weight")

                if time < min_time:
                    min_time = time
                    best_path = path
            except nx.NetworkXNoPath:
                continue

    if best_path is None:
        raise ValueError("No path found between the cities at the current time.")

    return best_path, min_time

In [46]:
source_city = "Strasbourg"
destination_city = "Paris Est"
current_time = datetime.now()

# Find the fastest path
path, time = find_fastest_path(graph, gtfs_data, source_city, destination_city, current_time)
if(path and time):
    print(f"Fastest path: {path} (Time: {time} minutes)")

NodeNotFound: Either source StopArea:OCE87212027 or target StopArea:OCE87113001 is not in G