# GeoPandas Spatial Join Crash Filter 

In [5]:
import os
import zipfile
import pandas as pd
import geopandas as gpd
import pyproj
from shapely.geometry import shape, LineString, MultiLineString, Point
from shapely.ops import transform
import folium

## 1️. Unzip and load files

In [None]:
shapefile_zip = "Road_Segment_-1946548593740953053.zip" 
#Shapefile obtained here: https://tn-tnmap.opendata.arcgis.com/datasets/37229399437446b9acd653f353f7decc_0/about
crash_csv = "datasets/Overall geospatial crash data for Madison and Henderson counties.csv"

extract_dir = "road_shapefile"
os.makedirs(extract_dir, exist_ok=True)
with zipfile.ZipFile(shapefile_zip, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

shapefile_path = os.path.join(extract_dir, "Road_Segment.shp")

# Load shapefile with GeoPandas
roads_gdf = gpd.read_file(shapefile_path)

# Load crashes as DataFrame
crashes_df = pd.read_csv(crash_csv)
crashes_df['Latitude'] = pd.to_numeric(crashes_df['Latitude'], errors='coerce')
crashes_df['Longitude'] = pd.to_numeric(crashes_df['Longitude'], errors='coerce')
crashes_df.dropna(subset=['Latitude', 'Longitude'], inplace=True)
crashes_gdf = gpd.GeoDataFrame(crashes_df, geometry=gpd.points_from_xy(crashes_df.Longitude, crashes_df.Latitude), crs='EPSG:4326')

.  .


## 2️. Filter I-40 interstate segments only

In [9]:
i40_gdf = roads_gdf[roads_gdf['ADM_SYS'].isin(['Interstate, Rural', 'Interstate, Urban'])].copy()

## 3️. Project both datasets to meters for buffering and spatial join

In [11]:
# EPSG:3857 (Web Mercator, meters)
i40_proj = i40_gdf.to_crs(epsg=3857)
crashes_proj = crashes_gdf.to_crs(epsg=3857)

## 4️. Apply buffer to I-40 segments (safe buffering after projection)

In [13]:
i40_buffered = i40_proj.buffer(100)
i40_buffered_gdf = gpd.GeoDataFrame(geometry=i40_buffered, crs='EPSG:3857')

## 5️. Spatial join to select crashes within 100m of I-40

In [15]:
joined = gpd.sjoin(crashes_proj, i40_buffered_gdf, how='inner', predicate='intersects')
filtered_crashes = joined.to_crs(epsg=4326).drop(columns=['index_right'])

## 6️. Save result

In [None]:
filtered_crashes.to_csv("datasets/Filtered_I40_Crashes_GEOSPATIAL_JOIN.csv", index=False)

## 7️. Map preview

In [19]:
m = folium.Map(location=[35.6, -88.8], zoom_start=9, tiles='cartodb positron')
for _, row in filtered_crashes.iterrows():
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=3,
        color='red',
        fill=True,
        fill_opacity=0.7
    ).add_to(m)
m.save("I40_Crashes_Map_GEOSPATIAL_JOIN.html")
m