In [14]:
import pandas as pd
import geopandas as gpd
import shapely.geometry as sgeo
import numpy as np

import requests
from tqdm.notebook import tqdm

import os, pickle, time

In [None]:
input_path = "../../../results/road/survey.parquet"
cache_path = "../../../results/road/freeflow/api_cache.pickle"
output_path = "../../../results/road/freeflow/reference_api.parquet"

key_path = "../../../resources/tomtom_api_key"
with open(key_path) as f:
    api_key = f.read()

request_slack = 0.5

In [16]:
# Read data
df_survey = pd.read_parquet(input_path)

In [17]:
# Read cache
if os.path.exists(cache_path):
    with open(cache_path, "rb") as f:
        cache = pickle.load(f)

else:
    cache = []

In [18]:
# Convert to WGS
df_survey["geometry"] = [
    sgeo.LineString([
        sgeo.Point(ox, oy), sgeo.Point(dx, dy)
    ])
    for ox, oy, dx, dy in zip(
        df_survey["origin_x"], df_survey["origin_y"],
        df_survey["destination_x"], df_survey["destination_y"]
    )
]

df_survey = gpd.GeoDataFrame(df_survey, crs = "EPSG:2154")
df_survey = df_survey.to_crs("EPSG:4326")

In [19]:
# Convert to a list of requests
request_list = [{
    "origin": "{},{}".format(*row["geometry"].coords[0][::-1]),
    "destination": "{},{}".format(*row["geometry"].coords[1][::-1]),
} for index, row in df_survey.iterrows()]

# Remove those where we already have responses
request_list = request_list[len(cache):]

In [20]:
with tqdm(initial = len(cache), total = len(df_survey)) as progress:
    for request in request_list:
        if len(cache) == 25271 or len(cache) == 33318 or len(cache) == 39340:
            # skip this one, it gives status 400, not sure why, looks perfectly fine
            cache.append(None) 
            continue
        
        try:
            response = requests.get("https://api.tomtom.com/routing/1/calculateRoute/{}:{}/json".format(
                request["origin"], request["destination"]
            ), {
                "departAt": "2024-05-01T04:00:00",
                "routeType": "fastest", 
                "traffic": "false",
                "key": api_key
            })

            if response.status_code != 200:
                print("Error code:", response.status_code, response.content)
                break

            cache.append(response.json())
            progress.update()
            time.sleep(request_slack)

        except:
            print("General exception")
            break

100%|##########| 39384/39384 [00:00<?, ?it/s]

In [21]:
print("Obtained", len(cache), "of", len(df_survey), "routes")
with open(cache_path, "wb+") as f:
    pickle.dump(cache, f)

Obtained 39384 of 39384 routes


In [22]:
# Reintegrate routing information
travel_times = [
    item["routes"][0]["summary"]["travelTimeInSeconds"] - item["routes"][0]["summary"]["trafficDelayInSeconds"] 
    if item is not None else np.nan for item in cache
]

# Paritally set up reference data (even if only one part of data has arrived)
df_reference = df_survey.iloc[:len(travel_times)]

df_reference = df_reference.rename(columns = {
    "travel_time": "survey_travel_time_s"
})

df_reference["reference_travel_time_s"] = travel_times

In [23]:
import plotly.express as px
px.scatter(df_reference, x = "survey_travel_time_s", y = "reference_travel_time_s")

In [24]:
# Output
df_reference[[
    "trip_id", "origin_x", "origin_y", "destination_x", "destination_y",
    "reference_travel_time_s", "weight"
]].to_parquet(output_path)

In [25]:
# Raise an exception when notebook is run in pipeline mode
if len(cache) < len(df_survey):
    raise RuntimeError("Not all routes have been obtained")