In [None]:
import networkx as nx # type: ignore
import random
from shapely.geometry import Point # type: ignore
from tqdm import tqdm # type: ignore
import geopandas as gpd # type: ignore
import numpy as np # type: ignore
import time
import networkx as nx
import igraph as ig
from tqdm import tqdm

%run toy_city_generation.ipynb
%run city_utilities.ipynb
%run real_city_generation.ipynb

%pip install osmnx matplotlib igraph

In [None]:
def get_valid_area(G, trip, tolerance=0.1):
    start_nodes = {n for n, data in G.nodes(data=True) if data.get("zone") == trip["start_zone_id"]}
    end_nodes = {n for n, data in G.nodes(data=True) if data.get("zone") == trip["end_zone_id"]}

    if not start_nodes or not end_nodes:
        return [], [], 0, 0

    valid_start_points = set()
    valid_end_points = set()

    with tqdm(total=len(start_nodes) * len(end_nodes), desc="Processing start and end nodes") as pbar:
        for s in start_nodes:
            for e in end_nodes:
                pbar.update(1)
            try:
                path_length = nx.shortest_path_length(G, source=s, target=e, weight="length")
                path_time = nx.shortest_path_length(G, source=s, target=e, weight="travel_time")

                length_ratio = abs(path_length - trip["distance"]) / trip["distance"]
                time_ratio = abs(path_time - trip["travel_time"]) / trip["travel_time"]

                if length_ratio < tolerance and time_ratio < tolerance:
                    valid_start_points.add(s)
                    valid_end_points.add(e)

            except nx.NetworkXNoPath:
                continue

    fraction_start = len(valid_start_points) / len(start_nodes) if start_nodes else 0
    fraction_end = len(valid_end_points) / len(end_nodes) if end_nodes else 0

    return list(valid_start_points), list(valid_end_points), fraction_start, fraction_end


In [None]:
from functools import lru_cache
import random
import networkx as nx

# Cache the shortest path between any two nodes
@lru_cache(maxsize=None)
def get_cached_path(G, start, end):
    try:
        # Returns the shortest path between two nodes
        return nx.shortest_path(G, source=start, target=end, weight="travel_time")
    except nx.NetworkXNoPath:
        return None

def get_valid_area_fast(G, trip, tolerance=0.1, max_attempts=100):
    start_nodes = {n for n, data in G.nodes(data=True) if data.get("zone") == trip["start_zone_id"]}
    end_nodes = {n for n, data in G.nodes(data=True) if data.get("zone") == trip["end_zone_id"]}

    if not start_nodes or not end_nodes:
        return [], [], 0, 0

    valid_start_points = set()
    valid_end_points = set()

    # Jumpstart the recursion by finding a valid path (pick at random until one is found)
    print("Jumpstarting the recursion...")
    attempts = 0
    while attempts < max_attempts:
        s = random.choice(list(start_nodes))
        e = random.choice(list(end_nodes))

        # Try to fetch the cached path
        path = get_cached_path(G, s, e)
        if path:
            path_length = sum(G[u][v][0]["length"] for u, v in zip(path[:-1], path[1:]))
            path_time = sum(G[u][v][0]["travel_time"] for u, v in zip(path[:-1], path[1:]))

            length_ratio = abs(path_length - trip["distance"]) / trip["distance"]
            time_ratio = abs(path_time - trip["travel_time"]) / trip["travel_time"]

            if length_ratio < tolerance and time_ratio < tolerance:
                valid_start_points.add(s)
                valid_end_points.add(e)
                break
        else:
            attempts += 1
            continue

    # If no valid path was found, return empty lists
    if not valid_start_points or not valid_end_points:
        print("No valid path found")
        return [], [], 0, 0

    # Now, we can use the valid path as a starting point for the recursion
    # Generate children aka. modified paths
    print("Generating modified paths...")
    modified_paths = generate_modified_paths(G, path, trip, tolerance)

    # Hooray - more possible start and end points - let's save them first, then explore them
    valid_start_points.update({p[0] for p in modified_paths})
    valid_end_points.update({p[-1] for p in modified_paths})

    # TODO: Implement the recursive exploration of the modified paths

    fraction_start = len(valid_start_points) / len(start_nodes) if start_nodes else 0
    fraction_end = len(valid_end_points) / len(end_nodes) if end_nodes else 0

    return list(valid_start_points), list(valid_end_points), fraction_start, fraction_end

def generate_modified_paths(G, path, trip, tolerance):
    modified_paths = []

    # Remove one node from the start
    if len(path) > 1:
        sub_path = path[1:]
        if is_valid_path(sub_path, G, trip, tolerance):
            modified_paths.append(sub_path)

    # Remove one node from the end
    if len(path) > 1:
        sub_path = path[:-1]
        if is_valid_path(sub_path, G, trip, tolerance):
            modified_paths.append(sub_path)

    # Add one node to the start
    if len(path) > 1:
        start_node = path[0]
        neighbors = set(G.neighbors(start_node)) - {path[1]}
        for neighbor in neighbors:
            sub_path = [neighbor] + path
            if is_valid_path(sub_path, G, trip, tolerance):
                modified_paths.append(sub_path)

    # Add one node to the end
    if len(path) > 1:
        end_node = path[-1]
        neighbors = set(G.neighbors(end_node)) - {path[-2]}
        for neighbor in neighbors:
            sub_path = path + [neighbor]
            if is_valid_path(sub_path, G, trip, tolerance):
                modified_paths.append(sub_path)

    return modified_paths

def is_valid_path(path, G, trip, tolerance):
    try:
        # Calculate length and time for the subpath
        sub_path_length = sum(G[u][v]["length"] for u, v in zip(path[:-1], path[1:]))
        sub_path_time = sum(G[u][v]["travel_time"] for u, v in zip(path[:-1], path[1:]))
        
        length_ratio = abs(sub_path_length - trip["distance"]) / trip["distance"]
        time_ratio = abs(sub_path_time - trip["travel_time"]) / trip["travel_time"]
        
        return length_ratio < tolerance and time_ratio < tolerance
    except KeyError:
        return False


In [None]:
# Main function

USE_FAKE_CITY = False

if USE_FAKE_CITY:
    zones, street_graph, trips, width, height = generate_synthetic_city_data() # type: ignore
    visualise(street_graph, zones, width, height, trips=trips) # type: ignore
else:
    zones, street_graph, trips = generate_real_city_data() # type: ignore
    print("Visualising real city...")
    #display_real_city(street_graph, zones, trips) # type: ignore

In [None]:
# IGraph might be faster?
CONVERT_TO_IGRAPH = True

if CONVERT_TO_IGRAPH:
    i_street_graph = ig.Graph(directed=True)
    print("Adding vertices...")

    node_id_map = {node: idx for idx, node in enumerate(street_graph.nodes())}

    i_street_graph.add_vertices(len(node_id_map))
    
    print("Adding edges...")
    edge_attributes = ['osmid', 'highway', 'lanes', 'maxspeed', 'name', 'oneway', 'ref', 'reversed', 'length', 'geometry', 'speed_kph', 'travel_time']
    for u, v, data in tqdm(street_graph.edges(data=True), desc="Adding edges"):
        i_street_graph.add_edge(node_id_map[u], node_id_map[v], **{attr: data.get(attr, None) for attr in edge_attributes})

    # Add attributes to the iGraph object (which are ['y', 'x', 'highway', 'ref', 'street_count', 'zone'])
    print("Adding attributes...")
    attributes = ['y', 'x', 'highway', 'ref', 'street_count', 'zone']
    for attr in attributes:
        i_street_graph.vs[attr] = [street_graph.nodes[node].get(attr, None) for node in street_graph.nodes()]

In [143]:
print("Getting valid area")
# Get the average fraction of valid start and end points
start_fractions = []
end_fractions = []
for trip in trips:
    print(trip)
    valid_start_points, valid_end_points, fraction_start, fraction_end = get_valid_area_fast(street_graph, trip)
    start_fractions.append(fraction_start)
    end_fractions.append(fraction_end)

print("Average fraction of valid start points:", np.mean(start_fractions))
print("Average fraction of valid end points:", np.mean(end_fractions))

Getting valid area
{'id': 0, 'start_zone_id': 246, 'start_zone_name': 'West Chelsea/Hudson Yards', 'end_zone_id': 115, 'end_zone_name': 'Grymes Hill/Clifton', 'travel_time': 1485.1334514469493, 'distance': 23621.023799470328, 'start_node': 42427390, 'end_node': 42949374, 'start_coordinate': (-74.005534, 40.7458171), 'end_coordinate': (-74.0803791, 40.6282814), 'route': [42427390, 42427393, 42443677, 246579597, 12161232243, 246579753, 373903786, 11049420899, 8288270047, 11027912161, 402480678, 246649427, 246890579, 12162542907, 4142073861, 246889572, 246890279, 12410398108, 12410398107, 246858448, 246858449, 246858435, 373880031, 246858433, 246858431, 5815234915, 9410604941, 3574648567, 4149947889, 449658637, 3574731739, 5882645201, 449658617, 449388063, 449388060, 598290969, 449388544, 449025263, 449029625, 449029630, 449588300, 449588315, 598357923, 598365407, 448151879, 448152046, 598365323, 6369277562, 6369269594, 3788073403, 679340670, 42955536, 411572871, 5528176064, 42947710, 552