In [1]:
from DOPU_given_timerange import get_cleaned_df
import pandas as pd
import numpy as np
import time
import numpy as np
import openrouteservice
from itertools import product
from openrouteservice.exceptions import ApiError

%pip install openrouteservice
import openrouteservice

NUMBER_OF_ZONES = 265

%run "intra zone depth/city_utilities.ipynb"
%run "intra zone depth/real_city_generation.ipynb"

Note: you may need to restart the kernel to use updated packages.


In [4]:
zones, street_graph, trips = generate_real_city_data(do_fabricate_trips=False) # type: ignore

Getting graph...
Graph loaded from 'nyc_roads.graphml'
Getting zones...
Assigning zones to nodes...


In [5]:
get_cleaned_df()

Processing files: 100%|██████████| 24/24 [00:24<00:00,  1.00s/it]


In [6]:
df = pd.read_parquet("all_cleaned_data/all_cleaned_data.parquet", engine="fastparquet")
df.head()

Unnamed: 0,VendorID,PickupDatetime,DropoffDatetime,TripDuration,PassengerCount,TripDistance,PULocationID,DOLocationID,PaymentType,FareAmount,ExtraCharges,MTATax,TipAmount,TollsAmount,ImprovementSurcharge,TotalAmount,CongestionSurcharge,AirportFee
0,2,2023-01-01 00:32:10,2023-01-01 00:40:36,8.433333,1.0,0.97,161,141,2,9.3,1.0,0.5,0.0,0.0,1.0,14.3,2.5,0.0
1,2,2023-01-01 00:55:08,2023-01-01 01:01:27,6.316667,1.0,1.1,43,237,1,7.9,1.0,0.5,4.0,0.0,1.0,16.9,2.5,0.0
2,2,2023-01-01 00:25:04,2023-01-01 00:37:49,12.75,1.0,2.51,48,238,1,14.9,1.0,0.5,15.0,0.0,1.0,34.900002,2.5,0.0
3,2,2023-01-01 00:10:29,2023-01-01 00:21:19,10.833333,1.0,1.43,107,79,1,11.4,1.0,0.5,3.28,0.0,1.0,19.68,2.5,0.0
4,2,2023-01-01 00:50:34,2023-01-01 01:02:52,12.3,1.0,1.84,161,137,1,12.8,1.0,0.5,10.0,0.0,1.0,27.799999,2.5,0.0


In [7]:
# Bundle trips based on where they start and end. Store the distance of each trip

# ! We filter then group so there's less data to work with.
def bundle_trip_distances(df, save_path="trip_distances.parquet"):
    print("Preparing flat table...")
    exploded_df = df[["PULocationID", "DOLocationID", "TripDistance"]].copy()
    
    print("Exploding trip distances...")
    exploded_df["TripDistance"] = exploded_df["TripDistance"].astype(np.float16)
    
    print("Saving exploded trip distances...")
    exploded_df.to_parquet(save_path, index=False)
    print(f"Saved trip distances to {save_path}.")

    return exploded_df

def read_bundled_trip_distances(file_path="trip_distances.parquet"):
    df = pd.read_parquet(file_path)
    print(f"Grouping trip distances...")
    return df.groupby(["PULocationID", "DOLocationID"])["TripDistance"].apply(np.array)

bundle_trip_distances(df)
trip_distances = read_bundled_trip_distances()
trip_distances.head()

Preparing flat table...
Exploding trip distances...
Saving exploded trip distances...
Saved trip distances to trip_distances.parquet.
Grouping trip distances...


PULocationID  DOLocationID
1             1               [0.23, 1.76, 3.8, 3.5, 9.6, 1.35, 1.83, 2.1, 0...
              23                                                         [11.4]
              50                                                  [17.62, 17.5]
              68                                           [13.8, 16.69, 16.55]
              87                                                        [21.02]
Name: TripDistance, dtype: object

In [8]:
edge_nodes_lookup = find_edge_nodes(street_graph) # type: ignore
print(edge_nodes_lookup)


{15.0: {8904353796, 5155201423, 5155200018, 5155201428, 5155201430, 1397756574, 274285474, 42854309, 42856369, 42809269, 42907062, 6557781048, 2883540409, 6557781049, 3789687872, 274283973, 42811471, 42861392, 42861394, 6210443218, 42833108, 7767939926, 42833111, 7767939927, 42861401, 42811866, 42861405, 42861406, 42811871, 42811874, 42833123, 42861412, 42833127, 274302953, 274302955, 7752550765, 42811887, 42809328, 42816751, 274283764, 42811894, 274302971}, 64.0: {464711680, 42847753, 39076490, 42787210, 461917223, 42902440, 42830634, 7824641837, 42815918, 42877872, 8794475440, 7761392180, 42806326, 42893112, 2351786298, 42823362, 42823364, 42810702, 7761381332, 464696533, 42803679, 42803176, 42914795, 462759531, 277672046, 42880624, 42828020, 42940661}, 24.0: {42421728, 561035330, 42421731, 42441283, 42421737, 42442569, 42421741, 12534082095, 42435346, 42421749, 42428760, 42441913, 5004570651, 42435359}, 151.0: {42437920, 42431106, 5004570627, 1061531654, 42433256, 42435337, 42441310

In [9]:
# edge_distance_lookup = generate_edge_distances_lookup(street_graph, edge_nodes_lookup) # type: ignore
# print(edge_distance_lookup)

In [29]:
def generate_edge_distances_lookup_fast(street_graph, edge_nodes_lookup, api_key, batch_size=3500):
    # Convert the lookup into a list of zones and their edge nodes
    zones = list(edge_nodes_lookup.keys())
    
    # Get the longitude and latitude of the edge nodes
    edge_node_coords = {node_id: (street_graph.nodes[node_id]["x"], street_graph.nodes[node_id]["y"])
                        for zone_nodes in edge_nodes_lookup.values()
                        for node_id in zone_nodes}
    
    print(f"Total edge nodes: {len(edge_node_coords)}")
    
    # Generate all possible node pairs between different zones
    node_pairs = []
    for zone1, zone2 in product(zones, repeat=2):
        if zone1 != zone2:
            for node1 in edge_nodes_lookup[zone1]:
                for node2 in edge_nodes_lookup[zone2]:
                    node_pairs.append((node1, node2))
    
    print(f"Total number of pairs: {len(node_pairs)}")
    
    # Initialize OpenRouteService client
    client = openrouteservice.Client(key=api_key)
    
    # Calculate number of batches needed
    num_batches = len(node_pairs) // batch_size + (len(node_pairs) % batch_size > 0)
    print(f"Number of batches: {num_batches}")
    
    return
    
    # Initialize the distance matrix as a square matrix of zeros
    distance_matrix = np.zeros((len(edge_node_coords), len(edge_node_coords)))
    
    # Process pairs in batches
    for i in range(0, len(node_pairs), batch_size):
        batch_pairs = node_pairs[i:i + batch_size]
        print(f"Processing batch {i // batch_size + 1} with {len(batch_pairs)} pairs...")
        
        # Prepare the locations for the API call
        batch_coords = [(edge_node_coords[pair[0]], edge_node_coords[pair[1]]) for pair in batch_pairs]
        
        try:
            # Request distance matrix from OpenRouteService API
            matrix = client.distance_matrix(
                locations=batch_coords,
                profile='driving-car',
                metrics=['distance'],
                resolve_locations=True,
            )
            
            # Fill the distance matrix
            for j, (pair, distance) in enumerate(zip(batch_pairs, matrix["distances"])):
                idx1, idx2 = pair
                distance_matrix[idx1, idx2] = distance[0]  # Direct distance from idx1 to idx2 (asymmetric)
            
            print(f"Batch {i // batch_size + 1} processed successfully.")
        
        except ApiError as e:
            print(f"Error processing batch {i // batch_size + 1}: {e}")
            continue
        
        # Add a delay to respect the API rate limit
        if (i // batch_size + 1) % 40 == 0:
            print("Rate limit reached. Waiting for 60 seconds...")
            time.sleep(60)
    
    print("Distance matrix generation complete.")
    
    # Save the combined matrix to a file
    np.save("edge_distance_matrix.npy", distance_matrix)
    print("Saved edge distance matrix to edge_distance_matrix.npy")
    
    return distance_matrix

api_key = ""

if api_key == "":
    raise ValueError("API key is required")

generate_edge_distances_lookup_fast(street_graph, edge_nodes_lookup, api_key)

ValueError: API key is required

In [None]:
# Example usage of the OpenRouteService API
client = openrouteservice.Client(key=api_key)

start_locations = [
    (8.681495, 49.41461),  # Start 1
    (8.687872, 49.420318), # Start 2
    (8.692803, 49.41943)   # Start 3
]

end_locations = [
    (8.695516, 49.420204),  # End 1
    (8.694580, 49.417205),  # End 2
    (8.690743, 49.416937)   # End 3
]

locations = start_locations + end_locations

# Compute distance matrix
matrix = client.distance_matrix(
    locations=locations,
    profile='driving-car',
    metrics=['distance'],
    resolve_locations=True
)

print(matrix)


In [12]:
if False:
    distance_data = matrix['distances']
    print(distance_data)