In [None]:
import numpy as np
import pandas as pd
from shapely.geometry import Point
from doSpatialJoin import joinCSVs

# load datasets
joinedDf = joinCSVs(
    "../../data/processed/dataset_v9.parquet",
    "../../data/raw/texas_edges.parquet",
    True,
    ["geometry"],
    True,
    100,
    False,
    False
)

# drop unnecessary columns
joinedDf.drop(columns=["index_right", "dist_to_intersection"], inplace=True)

# store results
finalCurveList = np.zeros(len(joinedDf))
bearingList = np.zeros(len(joinedDf))
failureRows = []

# precompute crash points array
crash_pts = np.array([[row.geometry.x, row.geometry.y] for row in joinedDf.itertuples(index=False)])

# flatten dataset_geometry into arrays with offsets
all_coords = []
row_offsets = [0]
for row in joinedDf.itertuples(index=False):
    coords_arr = np.array([[p.x, p.y] for p in row.dataset_geometry])
    all_coords.append(coords_arr)
    row_offsets.append(row_offsets[-1] + len(coords_arr))

all_coords = np.vstack(all_coords)  # shape (total_points, 2)
row_offsets = np.array(row_offsets)

# vectorized per-row processing
for i, row in enumerate(joinedDf.itertuples(index=False)):
    start, end = row_offsets[i], row_offsets[i+1]
    road_coords = all_coords[start:end]

    # compute distances to crash point
    crash_pt = crash_pts[i]
    distances = np.linalg.norm(road_coords - crash_pt, axis=1)

    chosen_idx = np.where(distances < 300)[0]

    if len(chosen_idx) >= 3:
        chosenPoints = road_coords[chosen_idx]
    else:
        failureRows.append(i)
        closest_index = np.argmin(distances)
        if closest_index == 0:
            chosenPoints = road_coords[:3]
        elif closest_index >= len(road_coords)-2:
            chosenPoints = road_coords[-3:]
        else:
            chosenPoints = road_coords[closest_index-1:closest_index+2]

    # vectorized curvature
    vec1 = chosenPoints[1:-1] - chosenPoints[:-2]
    vec2 = chosenPoints[2:] - chosenPoints[1:-1]
    angles1 = np.arctan2(vec1[:,1], vec1[:,0])
    angles2 = np.arctan2(vec2[:,1], vec2[:,0])
    diff = (angles2 - angles1 + np.pi) % (2*np.pi) - np.pi
    finalCurveList[i] = np.mean(np.abs(diff))

    # bearing at midpoint
    mid = len(chosenPoints) // 2
    dx, dy = chosenPoints[mid + 1] - chosenPoints[mid]
    bearingList[i] = (90 - np.degrees(np.arctan2(dy, dx))) % 360

# add results to DataFrame
joinedDf["curvature"] = finalCurveList
joinedDf["bearing"] = np.radians(bearingList)
joinedDf.drop(columns=["geometry", "dataset_geometry"], inplace=True)
joinedDf = pd.DataFrame(joinedDf)

# save outputs
joinedDf.to_parquet("../../data/processed/dataset_v10.parquet")
pd.DataFrame(failureRows, columns=["index"]).to_csv(
    "../../data/processed/dataset_v10_pointFailures.csv", index=False
)