## Packages and Configuration


In [3040]:
# %pip install plotly.express
# %pip install geopandas
# %pip install nbformat

In [3041]:
# Suppress hpy5 dependency warning from tslearn
import warnings
warnings.filterwarnings("ignore")

In [3042]:
# data libaries
from tslearn.metrics import dtw
from datetime import datetime, timedelta
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import math, json

# import plotly.express as px
# import geopandas as gpd

## Database Configuration

In [3043]:
from pymongo import MongoClient
from dotenv import load_dotenv
import certifi, os, uuid

In [3044]:
load_dotenv("../.env.prod")

True

In [3045]:
# get from environment
db_str = os.getenv("DB_STRING")
client = MongoClient(db_str, tlsCAFile=certifi.where(), uuidRepresentation='standard', tz_aware=True)
database = client.get_database("db")

## RideNow Matching Approach

1. Temporal, limit our candidate set to passengers traveling the same time
2. Spatial, further limit our candidate to passengers with proximity to driver trajectory
3. Correlation, find the driver with highest correlation trajectory in the candidate subset

### Load Trajectory Data


In [3046]:
def geojson_to_df(data) -> pd.DataFrame:
    # Extract passenger data
    coordinates = data["route"]["shape"]["coordinates"]
    # timestamps = [datetime.isoformat(ts) for ts in data["route"]["timestamps"]]

    # Create DataFrame
    df = pd.DataFrame({
        "lat": [coord[1] for coord in coordinates],
        "lon": [coord[0] for coord in coordinates], 
        # "timestamp": pd.to_datetime(timestamps)
    })

    return df

In [3047]:
with open("./search_p.json", "r") as file:
    data_passenger = json.load(file)

In [3048]:
with open("./search_d.json", "r") as file:
    data_driver = json.load(file)

### Trajectory Visualiazation


In [3049]:
df_d = geojson_to_df(data_driver)
df_p = geojson_to_df(data_passenger)

In [3050]:
df_p.head()

Unnamed: 0,lat,lon
0,55.65072,12.54122
1,55.65001,12.54043
2,55.64473,12.54384
3,55.64289,12.54686
4,55.64053,12.5538


In [3051]:
df_d.head()

Unnamed: 0,lat,lon
0,55.65072,12.54122
1,55.65001,12.54043
2,55.64473,12.54384
3,55.64289,12.54686
4,55.64053,12.5538


In [3052]:
fig = go.Figure()

# add passenger and driver trajectories as traces
fig.add_trace(go.Scattermapbox(mode="lines", lon=df_p["lon"], lat=df_p["lat"], line_color="blue"))
fig.add_trace(go.Scattermapbox(mode="lines", lon=df_d["lon"], lat=df_d["lat"], line_color="purple"))

fig.update_layout(height=600, mapbox=dict(style="open-street-map", zoom=6, center=dict(lon=df_p["lon"].mean(), lat=df_p["lat"].mean())))
fig.update_layout(dragmode=False, uirevision="lock")

### Data conversion


In [3053]:
def convert_to_unix(timestamp) -> float:
    datetime_obj = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
    unix_time = datetime_obj.timestamp()
    return unix_time

In [3054]:
def convert_data(matrix):
    # Convert the timestamps to Unix time using pandas (you could also use datetime module in Python)
    timestamps = [convert_to_unix(date) for _, _, date in matrix]

    # Create a numpy array for latitudes, longitudes, and Unix timestamps
    latitudes = np.array([lat for lat, _, _ in matrix])
    longitudes = np.array([lng for _, lng, _ in matrix])
    timestamps = np.array(timestamps)

    return np.array(list(zip(latitudes, longitudes, timestamps)))

### GPS point interpolation


In [3055]:
# Calculate the distance between two points on the Earth's surface using Haversine formula.
def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of the Earth in kilometers
    d_lat = math.radians(lat2 - lat1)
    d_lon = math.radians(lon2 - lon1)
    a = math.sin(d_lat / 2) * math.sin(d_lat / 2) + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(d_lon / 2) * math.sin(
        d_lon / 2
    )
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    d = R * c
    return d

In [3056]:
# used to extend line segments for finer control.
def intermediate_points(lat1, lon1, lat2, lon2, n):
    intermediate_points = []
    total_distance = haversine_distance(lat1, lon1, lat2, lon2)
    segment_distance = total_distance / (n + 1)  # n+2 points including the start and end points
    for i in range(1, n + 1):
        f = segment_distance * i / total_distance
        lat = lat1 + f * (lat2 - lat1)
        lon = lon1 + f * (lon2 - lon1)
        intermediate_points.append((lat, lon))
    return intermediate_points

In [3057]:
# Find the closest pair of points between two sets of points.
def closest_pair(points1, points2):
    min_distance = float("inf")
    closest_point_pair = None
    for p1 in points1:
        for p2 in points2:
            d = haversine_distance(p1[0], p1[1], p2[0], p2[1])
            if d < min_distance:
                min_distance = d
                closest_point_pair = (p1, p2)
    return closest_point_pair, min_distance

### Step 1: temporal - find passengers who travels at this time

1. We query the driver document
2. Use the departure to do a temporal range query on passengers.

In [3058]:
try:
    document_id = uuid.UUID("71a2aa23-f60f-44ba-847c-10ce2928fecc")
    driver = database["rides"].find_one({"_id": document_id})

    if driver:
        print(f"document found for: {document_id}")
    else:
        print(f"no document found for: {document_id}")
except Exception as e:
    print(f"failed with {e}")

document found for: 71a2aa23-f60f-44ba-847c-10ce2928fecc


In [3059]:
driver["departure"]

datetime.datetime(2024, 5, 15, 8, 0, 52, tzinfo=<bson.tz_util.FixedOffset object at 0x30b3010a0>)

In [3060]:
# Define the range for the query (e.g., 2 hours before and after the datetime)
delta_time = 180

delta_prev = driver["departure"] - timedelta(minutes=delta_time)
delta_next = driver["departure"] + timedelta(minutes=delta_time)

# Construct the query to find rides within the specified range
query = {"departure": {"$gte": delta_prev, "$lte": delta_next}}

In [3061]:
# Perform the query
results = database["ride_searches"].find(query)

In [3062]:
passengers = []

for doc in results:
    passengers.append(doc)

print(passengers)

[{'_id': UUID('7c3fd194-72f3-4a5c-a338-3bd337226d7f'), 'passenger': UUID('941c2cec-cc83-4759-9e6c-6231f40f5ea9'), 'start_address': {'street': 'Grønhøjgårdsvej 17', 'country': 'Danmark', 'city': 'Taastrup', 'province': 'Region Hovedstaden', 'postal_code': '2630', 'coordinate': {'latitude': 55.6449139, 'longitude': 12.3220772}}, 'destination_address': {'street': 'Roskilde', 'country': 'Danmark', 'city': 'Roskilde', 'province': 'Region Sjælland', 'postal_code': '', 'coordinate': {'latitude': 55.642411, 'longitude': 12.0831694}}, 'departure': datetime.datetime(2024, 5, 15, 8, 0, 47, tzinfo=<bson.tz_util.FixedOffset object at 0x30b3010a0>), 'max_deviation': 5000, 'route': {'distance': 19458.066, 'expected_travel_time': 1628.449, 'shape': {'type': 'LineString', 'coordinates': [[12.32119, 55.64496], [12.321319999999998, 55.64577], [12.32296, 55.64595], [12.320800000000004, 55.65293], [12.30609, 55.65129999999998], [12.303230000000001, 55.65160000000001], [12.30931, 55.65874000000001], [12.317

In [3063]:
len(passengers)

10

### Step 2: spatial - Compute points in range (GeoQueries)

In [3064]:
max_deviation = 5

In [3065]:
in_range_start = []
in_range_dest = []

In [3066]:
# iterate over driver geo points for start candidates
for d_point in driver["route"]["shape"]["coordinates"]:
    # check each passengers starting point
    for p in passengers:
        p_points = p["route"]["shape"]["coordinates"]
        dist = haversine_distance(d_point[0], d_point[1], p_points[0][0], p_points[0][1])
        if dist <= max_deviation:
            if p not in in_range_start:
                in_range_start.append(p)

In [3067]:
# Pre-calculate distances between driver's route points and start/end points of each passenger
# passenger_distances = {}
# for i, p in enumerate(passengers):
#     p_start = p["route"]["shape"]["coordinates"][0]
#     p_end = p["route"]["shape"]["coordinates"][-1]

#     start_distances = [haversine_distance(d_point[0], d_point[1], p_start[0], p_start[1]) for d_point in driver["route"]["shape"]["coordinates"]]
#     end_distances = [haversine_distance(d_point[0], d_point[1], p_end[0], p_end[1]) for d_point in driver["route"]["shape"]["coordinates"]]
#     passenger_distances[i] = {"start": start_distances, "end": end_distances}

In [3068]:
# iterate over driver geo points for destination candidates
for d_point in driver["route"]["shape"]["coordinates"]:
    # check passengers that passed start check
    for p in in_range_start:
        p_points = p["route"]["shape"]["coordinates"]
        dist = haversine_distance(d_point[0], d_point[1], p_points[-1][0], p_points[-1][1])
        if dist <= max_deviation:
            if p not in in_range_dest:
                in_range_dest.append(p)

In [3069]:
# Iterate over driver geo points
# for idx, d_point in enumerate(driver["route"]["shape"]["coordinates"]):
#     # Check start and end points for each passenger
#     for i, dists in passenger_distances.items():
#         start_dist = dists["start"][idx]
#         end_dist = dists["end"][idx]
#         if start_dist <= driver["max_deviation"]:
#             in_range_start.append(i)
#         if end_dist <= driver["max_deviation"] and i in in_range_start:
#             in_range_dest.append(i)

In [3070]:
len(in_range_start)

6

In [3071]:
len(in_range_dest)

4

In [3072]:
candidates = [p for p in in_range_start if p in in_range_dest]

In [3073]:
for candidate in candidates:
    print(f"rute: ({candidate['start_address']['street']}, {candidate['destination_address']['street']})")

rute: (A C Meyers Vænge 15, Hundige Strandvej 21)
rute: (Vindingevej 36, Fredericia)
rute: (Fredericia, Vejle)
rute: (Vejle, Herning)


### Visualize Candidates

In [3074]:
fig = go.Figure()

# add passenger and driver trajectories as traces
for candidate in candidates:
    df_c = geojson_to_df(candidate)
    fig.add_trace(go.Scattermapbox(mode="lines", lon=df_c["lon"], lat=df_c["lat"], line_color="green"))

    
# fig.add_trace(go.Scattermapbox(mode="lines", lon=df_p["lon"], lat=df_p["lat"], line_color="blue"))
fig.add_trace(go.Scattermapbox(mode="lines", lon=df_d["lon"], lat=df_d["lat"], line_color="purple"))

fig.update_layout(height=600, mapbox=dict(style="open-street-map", zoom=6, center=dict(lon=df_p["lon"].mean(), lat=df_p["lat"].mean())))
fig.update_layout(dragmode=False, uirevision="lock")

### Deprecated GeoSearch Code

In [3075]:
max_distance = driver["max_deviation"]
route = driver["route"]["shape"]["coordinates"]

In [3076]:
# TODO: Not working with data-structures: 
# Query documents within the specified distance from the reference point
point_queries = []
for point in route:
    query = {"shape": {"$nearSphere": {"$geometry": point, "$maxDistance": max_distance}}}
    point_queries.append(query)

### Step 3: correlation - Finding optimal matches from candidate list

In [3077]:
df_d = geojson_to_df(driver)

distances = {}
for candidate in candidates:
    df_c = geojson_to_df(candidate)
    distance = dtw(df_d, df_c)
    print(f"{candidate['start_address']['street']}, {candidate['destination_address']['street']} -> {distance}")

    


A C Meyers Vænge 15, Hundige Strandvej 21 -> 34.40499353874371
Vindingevej 36, Fredericia -> 14.048204611027778
Fredericia, Vejle -> 23.823781402346736
Vejle, Herning -> 25.824822616610593


In [3078]:
# p1_points = intermediate_points(p1_start[0], p1_start[1], p1_end[0], p1_end[1], 10)
df_p_ext = df_p[["lat", "lon"]]
df_d_ext = df_d[["lat", "lon"]]
distance = dtw(df_p_ext, df_d_ext)

In [3079]:
print(f"Trip Correlation: {distance}")

Trip Correlation: 11.065633657016432
