In [None]:
from doSpatialJoin import joinCSVs
import math
import geopandas as gpd
from geopandas import clip

# --- Parameters ---
radius = 1000  # meters
area = math.pi * radius**2  # area of 1 km radius circle

# --- Load crash and road datasets ---
crashDf, roadsDf = joinCSVs(
    "../../data/processed/dataset_v6.parquet",
    "../../data/raw/texas_edges.parquet",
    datasetIsParquet=True,
    keptColumns=["geometry"],
    needCreateDataset=True,
    maxMeterDist=None,
    removeGeometry=False,
    onlyGetDf=True
)

# --- Ensure CRS is projected in meters ---
crashDf = crashDf.to_crs(epsg=3857)
roadsDf = roadsDf.to_crs(epsg=3857)

# --- Create buffers around crashes (new column, preserves original points) ---
crashDf['buffer_geometry'] = crashDf.geometry.buffer(radius)
crash_buffers = crashDf.set_geometry('buffer_geometry')

# --- Clip roads to crash buffers to handle many-to-many intersections ---
roads_clipped = clip(roadsDf, crash_buffers)

# --- Group by crash index to calculate total road length and segment count ---
road_density = (
    roads_clipped
    .groupby('index_right')  # index_right points back to crashDf
    .agg(
        total_length_m=('geometry', 'length'),
        num_segments=('geometry', 'count')
    )
    .reset_index()
)

# --- Compute road density (length / area) ---
road_density['road_density_1km'] = road_density['total_length_m'] / area

# --- Merge results back to original crash dataframe ---
crashDf = crashDf.merge(
    road_density[['index_right', 'road_density_1km']],
    left_index=True,
    right_on='index_right',
    how='left'
)

# --- Optional: drop buffer geometry if you donâ€™t need it ---
crashDf.drop(columns=['buffer_geometry', 'geometry', 'index_right'], inplace=True)

# --- Save final dataset ---
crashDf.to_parquet("../../data/processed/dataset_v7.parquet")

print("Road density calculation complete!")