In [21]:
import pandas as pd
import numpy as np
import osmnx as ox
from geopy.distance import geodesic
from tqdm import tqdm
from shapely.geometry import MultiPoint
import time
from osmnx.projection import project_geometry

# Load and prepare waypoint data
df = pd.read_parquet("../data/filtered_waypoints.parquet")
df = df.sort_values(by=["id_tracking", "sequence"]).reset_index(drop=True)

# Limit to small sample while testing
# sample_ids = df["id_tracking"].unique()[:5]
# grouped = df[df["id_tracking"].isin(sample_ids)].groupby("id_tracking")

grouped = df.groupby("id_tracking")

features = []

for tracking_id, group in tqdm(grouped, desc="Extracting OSM features via Convex Hull"):
    group = group.dropna(subset=["latitude", "longitude"])
    coords = list(zip(group["latitude"], group["longitude"]))

    if len(coords) < 3:
        continue

    # Base GPS-derived features
    num_points = len(coords)
    lat_span = max(group["latitude"]) - min(group["latitude"])
    lon_span = max(group["longitude"]) - min(group["longitude"])
    bbox_area = lat_span * lon_span
    point_density = num_points / (bbox_area + 1e-6)
    dists = [geodesic(coords[i], coords[i + 1]).meters for i in range(len(coords) - 1)]
    avg_segment_distance = np.mean(dists)
    num_stops = (group["speed"] == 0).sum()

    try:
        # Build convex hull around GPS track
        points = MultiPoint([(lon, lat) for lat, lon in coords])
        hull_polygon = points.convex_hull

        if hull_polygon.area < 1e-6:
            raise ValueError("Convex hull too small to query OSM.")

        time.sleep(1.5)  # be kind to the Overpass API
        G = ox.graph_from_polygon(hull_polygon, network_type="drive")

        # Project to meters and compute area
        projected_polygon, _ = project_geometry(hull_polygon)
        area_m2 = projected_polygon.area

        # Compute basic stats with area
        stats = ox.basic_stats(G, area=area_m2)

        # OSM-based features for comparison
        osm_features = {
            "street_length_total": stats.get("street_length_total", np.nan),
            "intersection_count": stats.get("intersection_count", np.nan),
            "street_density_km": stats.get("street_density_km", np.nan),
            "edge_density_km": stats.get("edge_density_km", np.nan),
            "circuity_avg": stats.get("circuity_avg", np.nan),
            "intersection_density_km": stats.get("intersection_density_km", np.nan),
            "node_density_km": stats.get("node_density_km", np.nan)
        }

    except Exception as e:
        print(f"Failed for tracking_id {tracking_id}: {e}")
        osm_features = {
            "street_length_total": np.nan,
            "intersection_count": np.nan,
            "street_density_km": np.nan,
            "edge_density_km": np.nan,
            "circuity_avg": np.nan,
            "intersection_density_km": np.nan,
            "node_density_km": np.nan
        }

    features.append({
        "tracking_id": tracking_id,
        "num_points": num_points,
        "bbox_area": bbox_area,
        "point_density": point_density,
        "avg_segment_distance": avg_segment_distance,
        "num_stops": num_stops,
        **osm_features
    })

# Save results
features_df = pd.DataFrame(features)
features_df.to_csv("tracking_osm_convexhull.csv", index=False)
print("Saved enriched data to tracking_osm_convexhull.csv")


Extracting OSM features via Convex Hull:   0%|          | 0/850 [00:00<?, ?it/s]

Extracting OSM features via Convex Hull:  60%|█████▉    | 509/850 [29:34<14:59,  2.64s/it]  

Failed for tracking_id 64005: Graph contains no edges.


Extracting OSM features via Convex Hull:  76%|███████▌  | 642/850 [37:16<10:28,  3.02s/it]

Failed for tracking_id 73331: Graph contains no edges.


Extracting OSM features via Convex Hull: 100%|██████████| 850/850 [49:00<00:00,  3.46s/it]

Saved enriched data to tracking_osm_convexhull.csv





In [22]:
features_df = pd.read_csv("tracking_osm_convexhull.csv")
features_df.head()


Unnamed: 0,tracking_id,num_points,bbox_area,point_density,avg_segment_distance,num_stops,street_length_total,intersection_count,street_density_km,edge_density_km,circuity_avg,intersection_density_km,node_density_km
0,59,72,0.001813,39681.685256,104.093826,72,1294.041558,15.0,360.547704,721.095407,1.013029,4.179321,5.85105
1,65,154,0.006207,24806.119875,103.039632,154,15229.619805,63.0,878.678609,1721.050812,1.04272,3.634809,4.557935
2,78,237,0.046257,5123.419281,153.782723,237,178054.574509,482.0,2284.233969,4550.94978,1.09498,6.183502,7.86408
3,87,14,0.002002,6988.110554,508.602238,14,8474.946438,35.0,2627.00857,5173.047202,1.054584,10.849072,14.878727
4,172,253,0.000831,304095.27277,50.290536,253,6552.387616,26.0,1869.210227,3738.420453,1.048376,7.417062,12.26668
