# Geospatial Feature Tagging for I-40 Crash Data


In [None]:
import pandas as pd
import re

# File Paths
crash_data_path = "datasets/Filtered_I40_Crashes_GEOSPATIAL_JOIN.csv"
manual_features_path = "datasets/Manual Geospatial Feature Classification by Section.csv"
output_path = "datasets/ALL_I40_crash_data_final.csv"

In [None]:
# Parameters
longitude_col = "Longitude_adj"
min_longitude = -89.073096
max_longitude = -88.198390

In [None]:
# Data loading 
crash_data = pd.read_csv(crash_data_path)
manual_features = pd.read_excel(manual_features_path, sheet_name=None)

In [None]:
# Parse the manual annotations 
feature_lookup = {}

for sheet_name, df in manual_features.items():
    df = df.fillna("")
    feature = None

    for index, row in df.iterrows():
        row_first_cell = str(row.iloc[0]).strip()

        if "=" not in row_first_cell and row_first_cell != "":
            feature = row_first_cell
            if feature not in feature_lookup:
                feature_lookup[feature] = {}
            continue

        if "=" in row_first_cell:
            parts = row_first_cell.split("=")
            category = parts[0].strip()
            if category not in feature_lookup[feature]:
                feature_lookup[feature][category] = []

            for col in range(1, 6):
                range_string = row.iloc[col]
                pattern = r"(-?\\d+\\.\\d+)\\s+to\\s+(-?\\d+\\.\\d+)"
                matches = re.findall(pattern, str(range_string))
                for start, end in matches:
                    feature_lookup[feature][category].append((float(start), float(end)))

In [None]:
# filter by longitude 
crash_data_filtered = crash_data[
    (crash_data[longitude_col] >= min_longitude) & 
    (crash_data[longitude_col] <= max_longitude)
].copy()

In [None]:
# tagging step 
def tag_feature(lon, feature_ranges):
    hits = []
    for category, ranges in feature_ranges.items():
        for start, end in ranges:
            low, high = min(start, end), max(start, end)
            if low <= lon <= high:
                hits.append(category)
                break
    if len(hits) == 0:
        return "none"
    elif len(hits) == 1:
        return hits[0]
    else:
        return "MC"

for feature in feature_lookup:
    crash_data_filtered[feature] = crash_data_filtered[longitude_col].apply(
        lambda lon: tag_feature(lon, feature_lookup[feature])
    )

In [None]:
# Export 
crash_data_filtered.to_csv(output_path, index=False)
print(f"File saved as: {output_path}")