In [None]:
import requests
import pandas as pd

OSRM_URL_NY = "http://localhost:5000"
OSRM_URL_SF = "http://localhost:5001"


train = pd.read_csv('train_cleaned.csv', index_col=0)
test = pd.read_csv('test_cleaned.csv', index_col=0)

In [None]:
def get_osrm_route(osrm_url, start_lat, start_lng, end_lat, end_lng):
    """Fetch route details from OSRM based on start and end coordinates."""
    coordinates = f"{start_lng},{start_lat};{end_lng},{end_lat}"
    route_url = f"{osrm_url}/route/v1/driving/{coordinates}?overview=false"

    response = requests.get(route_url)
    
    if response.status_code == 200:
        data = response.json()
        if data["routes"]:
            return data["routes"][0]["distance"], data["routes"][0]["duration"]
    return None, None

In [None]:
ny_df_train = train[train['start_city'] == 'NY']
ny_df_test = test[test['start_city'] == 'NY']

sf_df_train = train[train['start_city'] == 'SF']
sf_df_test = test[test['start_city'] == 'SF']

ny_df_train["osrm_distance"], ny_df_train["osrm_duration"] = zip(
    *ny_df_train.apply(
        lambda row: get_osrm_route(OSRM_URL_NY, row["start_lat"], row["start_lng"], row["end_lat"], row["end_lng"]),
        axis=1
    )
)

ny_df_test["osrm_distance"], ny_df_test["osrm_duration"] = zip(
    *ny_df_test.apply(
        lambda row: get_osrm_route(OSRM_URL_NY, row["start_lat"], row["start_lng"], row["end_lat"], row["end_lng"]),
        axis=1
    )
)

sf_df_train["osrm_distance"], sf_df_train["osrm_duration"] = zip(
    *sf_df_train.apply(
        lambda row: get_osrm_route(OSRM_URL_SF, row["start_lat"], row["start_lng"], row["end_lat"], row["end_lng"]),
        axis=1
    )
)

sf_df_test["osrm_distance"], sf_df_test["osrm_duration"] = zip(
    *sf_df_test.apply(
        lambda row: get_osrm_route(OSRM_URL_SF, row["start_lat"], row["start_lng"], row["end_lat"], row["end_lng"]),
        axis=1
    )
)

ny_df_train = ny_df_train[(ny_df_train['osrm_distance'] > 0) & (ny_df_train['osrm_duration'] > 0)]
sf_df_train = sf_df_train[(sf_df_train['osrm_distance'] > 0) & (sf_df_train['osrm_duration'] > 0)]

ny_df_train = ny_df_train[['osrm_distance', 'osrm_duration']]
ny_df_test = ny_df_test[['osrm_distance', 'osrm_duration']]
sf_df_train = sf_df_train[['osrm_distance', 'osrm_duration']]
sf_df_test = sf_df_test[['osrm_distance', 'osrm_duration']]

train_osrm = pd.concat([ny_df_train, sf_df_train])
train_osrm.sort_index(inplace=True)
test_osrm = pd.concat([ny_df_test, sf_df_test])
test_osrm.sort_index(inplace=True)

train_osrm.to_csv('train_osrm.csv')
test_osrm.to_csv('test_osrm.csv')