In [None]:
%%html
<style type='text/css'>
.ͼo .cm-scroller {
    font-family: 'Monaspace Neon';
    font-size: 16px
}
</style>

**_<h3 style="color:cyan; text-align:center;">Boilerplate for AI Assignment — Knowledge Representation, Reasoning and Planning</h3>_**
**_<h3 style="color:cyan; text-align:center;">CSE 643</h3>_**


In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import networkx as nx
from pyDatalog import pyDatalog
from collections import Counter, defaultdict, deque

\***\*<h4 style="color:Green; font-family:cursive; text-align:center">IMPORTANT</h4>\*\***

<li style="font-style:italic">Don't import or use any other libraries other than defined above</li>
<li style="font-style:italic">Otherwise your code file will be rejected in the automated testing</li>


In [None]:
# ------------------ Global Variables ------------------
route_to_stops = defaultdict(list)  # Mapping of route IDs to lists of stops
trip_to_route = {}                   # Mapping of trip IDs to route IDs
stop_trip_count = defaultdict(int)    # Count of trips for each stop
fare_rules = {}                      # Mapping of route IDs to fare information
merged_fare_df = None                # To be initialized in create_kb()

# Load static data from GTFS (General Transit Feed Specification) files
df_stops = pd.read_csv('GTFS/stops.txt')
df_routes = pd.read_csv('GTFS/routes.txt')
df_stop_times = pd.read_csv('GTFS/stop_times.txt')
df_fare_attributes = pd.read_csv('GTFS/fare_attributes.txt')
df_trips = pd.read_csv('GTFS/trips.txt')
df_fare_rules = pd.read_csv('GTFS/fare_rules.txt')

In [None]:
print(df_stops.head())
print(df_routes.head())
print(df_stop_times.head())
print(df_fare_attributes.head())
print(df_trips.head())
print(df_fare_rules.head())

<h4 style="text-align:center; color:lightblue; font-style:italic;"> ------------------ Function Definitions ------------------</h4>
<h4 style="text-align:center; color:lightblue; font-style:italic;"> Function to create knowledge base from the loaded data</h4>


In [None]:
# def create_kb():
#     """
#     Create knowledge base by populating global variables with information from loaded datasets.
#     It establishes the relationships between routes, trips, stops, and fare rules.
    
#     Returns:
#         None
#     """
#     global route_to_stops, trip_to_route, stop_trip_count, fare_rules, merged_fare_df

#     # Create trip_id to r_id mapping
#     trip_to_route = {rps['trip_id']: rps['route_id'] for rps in df_trips.to_dict('records')}

#     for rps in df_stop_times.to_dict('records'):
#         route_id = trip_to_route[rps['trip_id']]
#         stop_id = rps['stop_id']

#         route_to_stops.setdefault(route_id, []).append(stop_id)
#         fare_rules.setdefault(stop_id, []).append(route_id)
#         stop_trip_count[stop_id] = stop_trip_count.get(stop_id, 0) + 1

#     for r_id in route_to_stops:
#         route_to_stops[r_id] = sorted(set(route_to_stops[r_id]))
#         fare_rules[r_id] = sorted(set(fare_rules.get(r_id, [])))

In [None]:
def create_kb():
    """
    Create knowledge base by populating global variables with information from loaded datasets.
    It establishes the relationships between routes, trips, stops, and fare rules.
    """
    global route_to_stops, trip_to_route, stop_trip_count, fare_rules, merged_fare_df
    
    # Create dictionaries to store temporary data
    temp_route_stops = defaultdict(set)  # Using set to ensure unique stops per route
    route_frequencies = defaultdict(int)  # To track route frequencies
    
    # Create trip_id to route_id mapping
    trip_to_route.clear()
    trip_to_route.update(dict(zip(df_trips['trip_id'], df_trips['route_id'])))
    
    # Process stop times to build route_to_stops and count trips
    for _, row in df_stop_times.iterrows():
        trip_id = row['trip_id']
        stop_id = row['stop_id']
        
        if trip_id in trip_to_route:
            route_id = trip_to_route[trip_id]
            temp_route_stops[route_id].add(stop_id)
            stop_trip_count[stop_id] += 1
            route_frequencies[route_id] += 1
    
    # Convert sets to sorted lists for consistent ordering
    route_to_stops.clear()
    for route_id, stops in temp_route_stops.items():
        route_to_stops[route_id] = list(stops)
    
    # Initialize fare rules
    fare_rules.clear()
    merged_df = pd.merge(df_fare_rules, df_fare_attributes, on='fare_id')
    for _, row in merged_df.iterrows():
        route_id = row['route_id']
        fare_amount = row['price']
        fare_rules[route_id] = fare_amount

In [None]:
create_kb()

In [None]:
# # Now you can access the information in the global variables:
print(route_to_stops)
# print(trip_to_route)
# print(stop_trip_count)
# print(fare_rules) 

In [None]:
# Function to find the top 5 busiest routes based on the number of trips
def get_busiest_routes():
    """
    Identify the top 5 busiest routes based on trip counts.

    Returns:
        list: A list of tuples, where each tuple contains:
              - route_id (int): The ID of the route.
              - trip_count (int): The number of trips for that route.
    """

    return Counter(trip_to_route.values()).most_common(5)

In [None]:
get_busiest_routes()

# [(5721, 318), (5722, 318), (674, 313), (593, 311), (5254, 272)]

In [None]:
# Function to find the top 5 stops with the most frequent trips
def get_most_frequent_stops():
    """
    Identify the top 5 stops with the highest number of trips.

    Returns:
        list: A list of tuples, where each tuple contains:
              - stop_id (int): The ID of the stop.
              - trip_count (int): The number of trips for that stop.
    """

    # # Sorts the stops by trip count in descending order and returns the top 5
    # return sorted(stop_trip_count.items(), key=lambda x: x[1], reverse=True)[:5]

    return Counter(stop_trip_count).most_common(5)

In [None]:
get_most_frequent_stops()

# [(10225, 4115), (10221, 4049), (149, 3998), (488, 3996), (233, 3787)]

In [None]:
# Function to find the top 5 busiest stops based on the number of rts passing through them
def get_top_5_busiest_stops():
    """
    Identify the top 5 stops with the highest number of different routes.

    Returns:
        list: A list of tuples, where each tuple contains:
              - stop_id (int): The ID of the stop.
              - route_count (int): The number of routes passing through that stop.
    """

    route_counts = Counter()
    for rts in route_to_stops.values():
        route_counts.update(rts)

    return route_counts.most_common(5)

In [None]:
get_top_5_busiest_stops()

# [(488, 102), (10225, 101), (149, 99), (233, 95), (10221, 86)]

In [None]:
# Function to identify the top 5 pairs of stops with only one direct route between them
def get_stops_with_one_direct_route():
    """
    Identify the top 5 pairs of consecutive stops (start and end) connected by exactly one direct route. 
    The pairs are sorted by the combined frequency of trips passing through both stops.

    Returns:
        list: A list of tuples, where each tuple contains:
              - pair (tuple): A tuple with two stop IDs (stop_1, stop_2).
              - r_id (int): The ID of the route connecting the two stops.
    """

    stop_pair_data = defaultdict(lambda: {'routes': set(), 'trips': 0})

    for r_id, stp in route_to_stops.items():
        for i in range(len(stp) - 1):
            start, end = stp[i], stp[i + 1]
            pair = (start, end)
            stop_pair_data[pair]['routes'].add(r_id)
            stop_pair_data[pair]['trips'] += stop_trip_count[start] + stop_trip_count[end]

    single_route_pairs = [(pair, next(iter(data['routes']))) 
                         for pair, data in stop_pair_data.items() 
                         if len(data['routes']) == 1]

    return sorted(single_route_pairs, key=lambda x: stop_pair_data[x[0]]['trips'], reverse=True)[:5]

In [None]:
get_stops_with_one_direct_route()

# [((233, 634), 10574),
#  ((10096, 10221), 5555),
#  ((233, 300), 719),
#  ((10120, 10775), 5599),
#  ((10020, 10221), 5024)]

In [None]:
# Function to get merged fare DataFrame
# No need to change this function
def get_merged_fare_df():
    """
    Retrieve the merged fare DataFrame.

    Returns:
        DataFrame: The merged fare DataFrame containing fare rules and attributes.
    """
    global merged_fare_df
    if merged_fare_df is None:
        create_kb()
        merged_fare_df = (
            pd.merge(df_fare_rules, df_fare_attributes, on="fare_id")
            .drop_duplicates(subset='fare_id', keep='first')
            .sort_values(by='fare_id')
            .reset_index(drop=True)
        )
    return merged_fare_df

In [None]:
get_merged_fare_df()

# 	fare_id	route_id	origin_id	destination_id	price	currency_type	payment_method	transfers	agency_id	old_fare_id
# 0	DIMTS_10001_1177_1178	10001	1177	1178	5.0	INR	0	0	DIMTS	DIMTS_10001_1177_1178
# 1	DIMTS_10001_1177_1179	10001	1177	1179	5.0	INR	0	0	DIMTS	DIMTS_10001_1177_1179
# 2	DIMTS_10001_1177_1180	10001	1177	1180	5.0	INR	0	0	DIMTS	DIMTS_10001_1177_1180
# 3	DIMTS_10001_1177_1181	10001	1177	1181	5.0	INR	0	0	DIMTS	DIMTS_10001_1177_1181
# 4	DIMTS_10001_1177_1182	10001	1177	1182	5.0	INR	0	0	DIMTS	DIMTS_10001_1177_1182


In [None]:
# Visualize the stop-route graph interactively
def visualize_stop_route_graph_interactive(route_to_stops):
    """
    Visualize the stop-route graph using Plotly for interactive exploration.

    Args:
        route_to_stops (dict): A dictionary mapping route IDs to lists of stops.

    Returns:
        None
    """
    # Genetating graph instance using Plotly for interactive exploration
    graph = nx.Graph()

    # Add nodes and edges to the graph
    for r_id, stp in route_to_stops.items():
        for i in range(len(stp) - 1):
            graph.add_edge(f"Stop_{stp[i]}", f"Stop_{stp[i+1]}", route=str(r_id))

    pos = nx.spring_layout(graph)

    # building edge trace
    edge_trace = go.Scatter(
        x=[pos[n1][i] for n1, n2 in graph.edges() for i in range(2)], 
        y=[pos[n1][j] for n1, n2 in graph.edges() for j in range(2)],
        line=dict(width=0.5, color='#888'),
        hoverinfo='text',
        text=[f"Route: {graph[n1][n2]['route']}" for n1, n2 in graph.edges()],
        mode='lines'
    )

    # Building node trace
    node_trace = go.Scatter(
        x=[pos[n][0] for n in graph.nodes()], 
        y=[pos[n][1] for n in graph.nodes()],
        mode='markers',
        hoverinfo='text',
        text=list(graph.nodes()), 
        marker=dict(size=10, color='#1f77b4', line_width=2)
    )

    # Building figure
    fig = go.Figure(
        data=[edge_trace, node_trace],
        layout=go.Layout(
            title='Delhi Bus Routes Network',
            showlegend=False,
            hovermode='closest',
            margin=dict(b=20, l=5, r=5, t=40),
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
        )
    )

    fig.show()

In [None]:
visualize_stop_route_graph_interactive(route_to_stops)

In [None]:
# Brute-Force Approach for finding direct routes
def direct_route_brute_force(start_stop, end_stop):
    """
    Find all valid routes between two stops using a brute-force method.

    Args:
        start_stop (int): The ID of the starting stop.
        end_stop (int): The ID of the ending stop.

    Returns:
        list: A list of route IDs (int) that connect the two stops directly.
    """
    # Generate all possible routes
    routes = []
    for r_id, stp in route_to_stops.items():
        if start_stop in stp and end_stop in stp:
            routes.append(r_id)

    # Find all valid routes
    valid_routes = []
    for r_id in routes:
        if start_stop in route_to_stops[r_id] and end_stop in route_to_stops[r_id]:
            valid_routes.append(r_id)

    return valid_routes

In [None]:
direct_route_brute_force(2001, 2005)

In [None]:
# Initialize Datalog predicates for reasoning
pyDatalog.create_terms('RouteHasStop, DirectRoute, OptimalRoute, X, Y, Z, R, R1, R2')  
def initialize_datalog():
    """
    Initialize Datalog terms and predicates for reasoning about routes and stops.

    Returns:
        None
    """
    # # Define the RouteHasStop predicate to represent the relationship between a route and a stop
    # for r_id, stp in route_to_stops.items():
    #     for st_seq, st_id in enumerate(stp, 1):
    #         +RouteHasStop(r_id, st_id, st_seq)  

        
    #     for i in range(len(stp) - 1):
    #         +DirectRoute(r_id, stp[i], stp[i + 1])
    # DirectRoute means you can travel directly between two stops on the same route
    DirectRoute(R, X, Y) <= RouteHasStop(R, X, Z) & RouteHasStop(R, Y, Y.Z) & (Y.Z == Z + 1)
    
    # OptimalRoute includes both direct routes and routes with one transfer
    OptimalRoute(X, Y, R) <= DirectRoute(R, X, Y)

    create_kb()  # Populate the knowledge base
    add_route_data(route_to_stops)  # Add route data to Datalog

In [None]:
initialize_datalog()

In [None]:
# Adding route data to Datalog
def add_route_data(route_to_stops):
    """
    Add the route data to Datalog for reasoning.

    Args:
        route_to_stops (dict): A dictionary mapping route IDs to lists of stops.

    Returns:
        None
    """
    # Create a new Datalog program
    
    for r_id, stp in route_to_stops.items():
        for st_seq, st_id in enumerate(stp, 1):
            +RouteHasStop(r_id, st_id, st_seq) 

In [None]:
add_route_data(route_to_stops)

In [None]:
route_to_stops

In [None]:
# # Function to query direct routes between two stops
# def query_direct_routes(start, end):
#     """
#     Query for direct routes between two stops.

#     Args:
#         start (int): The ID of the starting stop.
#         end (int): The ID of the ending stop.

#     Returns:
#         list: A sorted list of route IDs (str) connecting the two stops.
#     """
#     # Query the database for direct routes between the two stops
#     return [r_id for (r_id,) in pyDatalog.ask(f"DirectRoute({start}, {end}, X)").answers]

In [None]:
def query_direct_routes(start, end):
    """
    Query for direct routes between two stops.

    Args:
        start (int): The ID of the starting stop.
        end (int): The ID of the ending stop.

    Returns:
        list: A sorted list of route IDs connecting the two stops directly in order.
    """
    direct_routes = []
    for r_id, stps in route_to_stops.items():
        if start in stps and end in stps:
            direct_routes.append(r_id)
    
    # Return unique, sorted route IDs
    return direct_routes

In [None]:
query_direct_routes(2001, 2005)

In [None]:
# def backward_chaining(start_stop_id, end_stop_id, stop_id_to_include, max_transfers):
#     paths = []
#     visited = set()
    
#     def dfs(current_stop, path, transfers):
#         if transfers > max_transfers:
#             return
        
#         if current_stop == start_stop_id:
#             if stop_id_to_include in [stop for _, stop, _ in path]:
#                 paths.append(list(reversed(path)))
#             return
        
#         visited.add(current_stop)
        
#         for route_id, stops in route_to_stops.items():
#             if current_stop in stops:
#                 prev_stop_index = stops.index(current_stop) - 1
#                 if prev_stop_index >= 0:
#                     prev_stop = stops[prev_stop_index]
#                     if prev_stop not in visited:
#                         new_path = [(route_id, prev_stop, route_id)] + path
#                         dfs(prev_stop, new_path, transfers)
                        
#         for route_id, stops in route_to_stops.items():
#             if current_stop in stops:
#                 for transfer_route, transfer_stops in route_to_stops.items():
#                     if route_id != transfer_route and current_stop in transfer_stops:
#                         prev_stop_index = transfer_stops.index(current_stop) - 1
#                         if prev_stop_index >= 0:
#                             prev_stop = transfer_stops[prev_stop_index]
#                             if prev_stop not in visited:
#                                 new_path = [(transfer_route, prev_stop, route_id)] + path
#                                 dfs(prev_stop, new_path, transfers + 1)
        
#         visited.remove(current_stop)
    
#     dfs(end_stop_id, [], 0)
#     return paths

In [None]:
# def forward_chaining(start_stop_id, end_stop_id, stop_id_to_include, max_transfers):
#     """
#     Perform forward chaining to find optimal routes considering transfers.
#     """
#     paths = []
#     visited_route_stops = set()

#     def get_routes_through_stop(stop_id):
#         """Get all routes that pass through a specific stop."""
#         stop_routes = []
#         for route_id, stops in route_to_stops.items():
#             if stop_id in stops:
#                 stop_routes.append(route_id)
#         return stop_routes

#     def explore_route(route_id, current_stop):
#         """Get all possible destination stops for a given route and current stop."""
#         stops = route_to_stops[route_id]
#         try:
#             current_idx = stops.index(current_stop)
#             possible_destinations = []
#             # Consider all stops after current_stop in the route
#             for idx in range(current_idx + 1, len(stops)):
#                 possible_destinations.append(stops[idx])
#             return possible_destinations
#         except ValueError:
#             return []

#     def find_paths(current_stop, visited):
#         """Find all valid paths from current stop."""
#         if len(visited) > max_transfers + 1:
#             return

#         # If we reached the end stop and satisfied the inclusion constraint
#         if current_stop == end_stop_id:
#             if stop_id_to_include is None or stop_id_to_include in {v[1] for v in visited}:
#                 paths.append(visited)
#             return

#         # Get all routes passing through current stop
#         current_routes = get_routes_through_stop(current_stop)
        
#         for route_id in current_routes:
#             # Get possible next stops on this route
#             next_stops = explore_route(route_id, current_stop)
            
#             for next_stop in next_stops:
#                 # Create route-stop pattern
#                 route_stop = (route_id, current_stop, next_stop)
                
#                 if route_stop not in visited_route_stops:
#                     visited_route_stops.add(route_stop)
#                     find_paths(next_stop, visited + [route_stop])
#                     visited_route_stops.remove(route_stop)

#     # Start exploration from initial stop
#     find_paths(start_stop_id, [])
    
#     # Format paths to match test case format
#     formatted_paths = []
#     for path in paths:
#         if len(path) <= max_transfers + 1:  # Only include paths within transfer limit
#             formatted_paths.append(path)
    
#     return formatted_paths

# def backward_chaining(start_stop_id, end_stop_id, stop_id_to_include, max_transfers):
#     """
#     Perform backward chaining to find optimal routes considering transfers.
#     """
#     paths = []
#     visited_route_stops = set()

#     def get_routes_through_stop(stop_id):
#         """Get all routes that pass through a specific stop."""
#         stop_routes = []
#         for route_id, stops in route_to_stops.items():
#             if stop_id in stops:
#                 stop_routes.append(route_id)
#         return stop_routes

#     def explore_route_backward(route_id, current_stop):
#         """Get all possible origin stops for a given route and current stop."""
#         stops = route_to_stops[route_id]
#         try:
#             current_idx = stops.index(current_stop)
#             possible_origins = []
#             # Consider all stops before current_stop in the route
#             for idx in range(current_idx - 1, -1, -1):
#                 possible_origins.append(stops[idx])
#             return possible_origins
#         except ValueError:
#             return []

#     def find_paths_backward(current_stop, visited):
#         """Find all valid paths backwards from current stop."""
#         if len(visited) > max_transfers + 1:
#             return

#         # If we reached the start stop and satisfied the inclusion constraint
#         if current_stop == start_stop_id:
#             if stop_id_to_include is None or stop_id_to_include in {v[1] for v in visited}:
#                 paths.append(list(reversed(visited)))
#             return

#         # Get all routes passing through current stop
#         current_routes = get_routes_through_stop(current_stop)
        
#         for route_id in current_routes:
#             # Get possible previous stops on this route
#             prev_stops = explore_route_backward(route_id, current_stop)
            
#             for prev_stop in prev_stops:
#                 # Create route-stop pattern
#                 route_stop = (route_id, prev_stop, current_stop)
                
#                 if route_stop not in visited_route_stops:
#                     visited_route_stops.add(route_stop)
#                     find_paths_backward(prev_stop, visited + [route_stop])
#                     visited_route_stops.remove(route_stop)

#     # Start exploration from end stop
#     find_paths_backward(end_stop_id, [])
    
#     # Format paths to match test case format
#     formatted_paths = []
#     for path in paths:
#         if len(path) <= max_transfers + 1:  # Only include paths within transfer limit
#             formatted_paths.append(path)
    
#     return formatted_paths

# def pddl_planning(start_stop_id, end_stop_id, stop_id_to_include, max_transfers):
#     """
#     Implement PDDL-style planning to find routes with optional transfers.
#     """
#     paths = []
#     visited_route_stops = set()

#     class PlanningState:
#         def __init__(self, current_stop, path, transfers):
#             self.current_stop = current_stop
#             self.path = path
#             self.transfers = transfers
#             self.included_required = (stop_id_to_include is None or 
#                                     stop_id_to_include in {p[1] for p in path})

#     def get_routes_through_stop(stop_id):
#         """Get all routes that pass through a specific stop."""
#         stop_routes = []
#         for route_id, stops in route_to_stops.items():
#             if stop_id in stops:
#                 stop_routes.append(route_id)
#         return stop_routes

#     def get_next_stops(route_id, current_stop):
#         """Get next possible stops on a route."""
#         stops = route_to_stops[route_id]
#         try:
#             current_idx = stops.index(current_stop)
#             return [(s, idx) for idx, s in enumerate(stops) if idx > current_idx]
#         except ValueError:
#             return []

#     # Initialize planning
#     initial_state = PlanningState(start_stop_id, [], 0)
#     frontier = deque([initial_state])
#     explored = set()

#     while frontier:
#         current_state = frontier.popleft()

#         # Check if goal state reached
#         if (current_state.current_stop == end_stop_id and 
#             current_state.included_required and 
#             current_state.transfers <= max_transfers):
#             paths.append(current_state.path)
#             continue

#         if current_state.transfers > max_transfers:
#             continue

#         state_key = (current_state.current_stop, 
#                     tuple(sorted((r, s1, s2) for r, s1, s2 in current_state.path)))
#         if state_key in explored:
#             continue
#         explored.add(state_key)

#         # Get possible actions (routes) from current stop
#         current_routes = get_routes_through_stop(current_state.current_stop)
        
#         for route_id in current_routes:
#             next_stops = get_next_stops(route_id, current_state.current_stop)
            
#             for next_stop, _ in next_stops:
#                 route_stop = (route_id, current_state.current_stop, next_stop)
                
#                 if route_stop not in visited_route_stops:
#                     visited_route_stops.add(route_stop)
#                     new_path = current_state.path + [route_stop]
#                     new_transfers = current_state.transfers
#                     if current_state.path and current_state.path[-1][0] != route_id:
#                         new_transfers += 1
                    
#                     new_state = PlanningState(next_stop, new_path, new_transfers)
#                     frontier.append(new_state)
#                     visited_route_stops.remove(route_stop)

#     # Format paths to match test case format
#     formatted_paths = []
#     for path in paths:
#         if len(path) <= max_transfers + 1:  # Only include paths within transfer limit
#             formatted_paths.append(path)
    
#     return formatted_paths

In [None]:
def forward_chaining(start_stop_id, end_stop_id, via_stop_id, max_transfers):
    valid_paths = []
    queue = deque([(start_stop_id, [], 0)])  # (current_stop, path, transfers)

    while queue:
        current_stop, path, transfers = queue.popleft()

        # Check if we have reached the end stop with the via stop included
        if current_stop == end_stop_id and via_stop_id in [stop for _, stop in path]:
            valid_paths.append(path)
            continue  # Found a valid path, continue searching for more

        # If transfers exceed max, skip
        if transfers > max_transfers:
            continue

        # Explore all routes from the current stop
        for route_id, stops in route_to_stops.items():
            if current_stop in stops:
                current_idx = stops.index(current_stop)
                for next_stop in stops[current_idx + 1:]:
                    new_transfers = transfers + (1 if path and path[-1][0] != route_id else 0)
                    queue.append((next_stop, path + [(route_id, next_stop)], new_transfers))

    return valid_paths

def backward_chaining(start_stop_id, end_stop_id, via_stop_id, max_transfers):
    valid_paths = []
    queue = deque([(end_stop_id, [], 0)])  # (current_stop, path, transfers)

    while queue:
        current_stop, path, transfers = queue.popleft()

        # Check if we have reached the start stop with the via stop included
        if current_stop == start_stop_id and via_stop_id in [stop for _, stop in path]:
            valid_paths.append(path[::-1])  # Reverse the path to show it from start to end
            continue  # Found a valid path, continue searching for more

        # If transfers exceed max, skip
        if transfers > max_transfers:
            continue

        # Explore all routes from the current stop
        for route_id, stops in route_to_stops.items():
            if current_stop in stops:
                current_idx = stops.index(current_stop)
                for prev_stop in stops[:current_idx]:
                    new_transfers = transfers + (1 if path and path[-1][0] != route_id else 0)
                    queue.append((prev_stop, path + [(route_id, prev_stop)], new_transfers))

    return valid_paths

In [None]:

# Forward chaining for optimal route planning
def forward_chaining(start_stop_id, end_stop_id, stop_id_to_include, max_transfers):
    """
    Perform forward chaining to find optimal routes considering transfers.

    Args:
        start_stop_id (int): The starting stop ID.
        end_stop_id (int): The ending stop ID.
        stop_id_to_include (int): The stop ID where a transfer occurs.
        max_transfers (int): The maximum number of transfers allowed.

    Returns:
        list: A list of unique paths (list of tuples) that satisfy the criteria, where each tuple contains:
              - route_id1 (int): The ID of the first route.
              - stop_id (int): The ID of the intermediate stop.
              - route_id2 (int): The ID of the second route.
    """
    r_id_1 = [r_id for r_id, stps in route_to_stops.items() if start_stop_id in stps and stop_id_to_include in stps]
    r_id_2 = [r_id for r_id, stps in route_to_stops.items() if end_stop_id in stps and stop_id_to_include in stps]

    # Combine routes and check transfer constraints
    paths = []
    for r1 in r_id_1:
        for r2 in r_id_2:
            if r1 == r2 or max_transfers > 0:
                paths.append((r1, stop_id_to_include, r2))
    
    print(paths)
    
    return paths

# Backward chaining for optimal route planning
def backward_chaining(start_stop_id, end_stop_id, stop_id_to_include, max_transfers):
    """
    Perform backward chaining to find optimal routes considering transfers.

    Args:
        start_stop_id (int): The starting stop ID.
        end_stop_id (int): The ending stop ID.
        stop_id_to_include (int): The stop ID where a transfer occurs.
        max_transfers (int): The maximum number of transfers allowed.

    Returns:
        list: A list of unique paths (list of tuples) that satisfy the criteria, where each tuple contains:
              - route_id1 (int): The ID of the first route.
              - stop_id (int): The ID of the intermediate stop.
              - route_id2 (int): The ID of the second route.
    """
    r_id_1 = [r_id for r_id, stps in route_to_stops.items() if start_stop_id in stps and stop_id_to_include in stps]
    r_id_2 = [r_id for r_id, stps in route_to_stops.items() if end_stop_id in stps and stop_id_to_include in stps]

    # Combine routes and check transfer constraints
    paths = []
    for r1 in r_id_1:
        for r2 in r_id_2:
            if r1 == r2 or max_transfers > 0:
                paths.append((r1, stop_id_to_include, r2))
    
    return paths

In [None]:
backward_chaining(22540, 2573, 4686, 1)

In [None]:
def forward_chaining(start_stop_id, end_stop_id, via_stop_id, max_transfers):
    valid_paths = []
    queue = deque([(start_stop_id, [], 0)])  # (current_stop, path, transfers)

    while queue:
        current_stop, path, transfers = queue.popleft()

        # Check if we have reached the end stop with the via stop included
        if current_stop == end_stop_id and via_stop_id in [stop for _, stop in path]:
            valid_paths.append(path)
            continue  # Found a valid path, continue searching for more

        # If transfers exceed max, skip
        if transfers > max_transfers:
            continue

        # Explore all routes from the current stop
        for route_id, stops in route_to_stops.items():
            if current_stop in stops:
                current_idx = stops.index(current_stop)
                for next_stop in stops[current_idx + 1:]:
                    new_transfers = transfers + (1 if path and path[-1][0] != route_id else 0)
                    queue.append((next_stop, path + [(route_id, next_stop)], new_transfers))

    return valid_paths


In [None]:
# Optimized forward chaining using BFS with pruning
def forward_chaining(start_stop_id, end_stop_id, stop_id_to_include, max_transfers):
    """
    Perform forward chaining using BFS to find optimal routes considering transfers.

    Args:
        start_stop_id (int): The starting stop ID.
        end_stop_id (int): The ending stop ID.
        stop_id_to_include (int): The stop ID where a transfer occurs.
        max_transfers (int): The maximum number of transfers allowed.

    Returns:
        list: A list of unique routes where each route is represented as a list of tuples 
              containing (route_id, stop_id).
    """
    queue = deque([(start_stop_id, 0, [start_stop_id])])
    unique_paths = set()

    while queue:
        current_stop, current_transfers, path = queue.popleft()

        if current_stop == end_stop_id:
            unique_paths.add(tuple(path))  # Store path in order
            continue

        if current_transfers > max_transfers:
            continue

        for r_id, stops in route_to_stops.items():            
            if current_stop in stops:
                for next_stop in stops[stops.index(current_stop) + 1:]:
                    if next_stop not in path:  # Avoid cycles
                        new_transfers = current_transfers + (1 if next_stop == stop_id_to_include else 0)
                        queue.append((next_stop, new_transfers, path + [next_stop]))

    result = [
        (r_id, path[i + 1])
        for path in unique_paths
        for i in range(len(path) - 1)
        for r_id in query_direct_routes(path[i], path[i + 1])
    ]

    return result

In [None]:
forward_chaining(951, 340, 300, 1)

In [None]:
# PDDL-style planning for route finding
def pddl_planning(start_stop_id, end_stop_id, stop_id_to_include, max_transfers):
    """
    Implement PDDL-style planning to find routes with optional transfers.

    Args:
        start_stop_id (int): The starting stop ID.
        end_stop_id (int): The ending stop ID.
        stop_id_to_include (int): The stop ID for a transfer.
        max_transfers (int): The maximum number of transfers allowed.

    Returns:
        list: A list of unique paths (list of tuples) that satisfy the criteria, where each tuple contains:
              - route_id (int): The ID of the route.
              - stop_id (int): The ID of the stop.
    """

    def is_goal(state):
        """Check if the current state is a goal state."""
        return state[0] == end_stop_id and (not stop_id_to_include or any(stop == stop_id_to_include for _, stop in state[1]))

    def get_successors(state):
        """Generate successor states based on possible actions."""
        successors = []
        current_stop, path, transfers = state

        for route_id, stops in route_to_stops.items():
            if current_stop in stops:
                current_idx = stops.index(current_stop)

                # Explore next stops on the same route
                for next_stop in stops[current_idx + 1:]:
                    new_path = path + ((route_id, next_stop),) # Concatenate tuple with tuple
                    successors.append(((next_stop, new_path, transfers)))

                # Explore transfers to other routes at the current stop
                if transfers < max_transfers:
                    for other_route_id, other_stops in route_to_stops.items():
                        if route_id != other_route_id and current_stop in other_stops:
                            for next_stop in other_stops:
                                if next_stop != current_stop:
                                    new_path = path + ((other_route_id, next_stop),) # Concatenate tuple with tuple
                                    successors.append(((next_stop, new_path, transfers + 1)))

        return successors

    initial_state = (start_stop_id, tuple([(None, start_stop_id)]), 0)  
    
    queue = [(initial_state, None)] # (state, parent_state)
    visited = set([initial_state])
    
    while queue:
        (state, parent_state) = queue.pop(0)
        current_stop, _, transfers = state

        if is_goal(state):
            return reconstruct_path(state, parent_state) # Backtrack to get the path

        if transfers <= max_transfers:  
            for next_state in get_successors(state):
                if next_state not in visited:
                    queue.append((next_state, state)) # Store current state as parent
                    visited.add(next_state)

    return None # No path found

def reconstruct_path(state, parent_state):
    """Reconstructs the path from the goal state to the initial state."""
    path = [state[1]]
    while parent_state is not None:
        state = parent_state
        path.append(state[1])
        parent_state = state[1]
    return list(reversed(path))

In [None]:
pddl_planning(951, 340, 300, 1)

In [None]:
# Function to filter fare data based on an initial fare limit
def prune_data(merged_fare_df, initial_fare):
    """
    Filter fare data based on an initial fare limit.

    Args:
        merged_fare_df (DataFrame): The merged fare DataFrame.
        initial_fare (float): The maximum fare allowed.

    Returns:
        DataFrame: A filtered DataFrame containing only routes within the fare limit.
    """
    # Filter the merged fare DataFrame to include only routes with fares below the initial fare limit
    return merged_fare_df[merged_fare_df['price'] <= initial_fare]

In [None]:
# merged_fare_df.head()

In [None]:
prune_data(merged_fare_df, 10)

In [None]:
# Pre-computation of Route Summary
def compute_route_summary(pruned_df):
    """
    Generate a summary of routes based on fare information.

    Args:
        pruned_df (DataFrame): The filtered DataFrame containing fare information.

    Returns:
        dict: A summary of routes with the following structure:
              {
                  route_id (int): {
                      'min_price': float,          # The minimum fare for the route
                      'stops': set                # A set of stop IDs for that route
                  }
              }
    """
    route_summary = {}
    for route_id, data in pruned_df.groupby('route_id'):
        route_summary[route_id] = {
            'min_price': data['price'].min(),
            'stops': set(data['destination_id'])
        }
    return route_summary

In [None]:
route_summary = compute_route_summary(prune_data(merged_fare_df, 10))

In [None]:
route_summary

In [None]:
def bfs_route_planner_optimized(start_stop_id, end_stop_id, initial_fare, max_transfers=3):
    """
    Use Breadth-First Search (BFS) to find the optimal route while considering fare constraints.

    Args:
        start_stop_id (int): The starting stop ID.
        end_stop_id (int): The ending stop ID.
        initial_fare (float): The available fare for the trip.
        route_summary (dict): A dictionary containing route summary information.
        max_transfers (int): The maximum number of transfers allowed (default is 3).

    Returns:
        list: A list representing the optimal route (or None if not found)
              structured as a list of (route_id, stop_id) tuples.
    """
    if not all([start_stop_id, end_stop_id, route_summary]) or not isinstance(route_summary, dict):
        raise ValueError("Invalid input parameters")

    queue = [([(None, start_stop_id)], 0, initial_fare)]
    visited = set([(start_stop_id, 0)])
    best_path = None

    while queue:
        current_path, transfers, remaining_fare = queue.pop(0)
        current_stop = current_path[-1][1]

        if current_stop == end_stop_id:
            if not best_path or len(current_path) < len(best_path):
                best_path = current_path[1:]  # Remove the initial (None, start_stop_id)
            continue

        if transfers < max_transfers:
            for route_id, route_info in route_summary.items():
                if current_stop in route_info['stops']:
                    for next_stop in route_info['stops']:
                        if next_stop != current_stop:
                            new_transfers = transfers + (current_path[-1][0] != route_id)
                            new_fare = remaining_fare - route_info['min_price']
                            
                            if (next_stop, new_transfers) not in visited and new_fare >= 0:
                                new_path = current_path + [(route_id, next_stop)]
                                queue.append((new_path, new_transfers, new_fare))
                                visited.add((next_stop, new_transfers))

    return best_path

In [None]:
bfs_route_planner_optimized(22540, 2573, 10, 3)