In [None]:
import geopandas as gpd
import os

# === PATHS ===
base_dir = r"C:\Users\natda\OneDrive - Northeastern University\Desktop\NatDave\Academics\PhD_NU\RESEARCH\Traffic_Stress\Boston"
roads_path = os.path.join(base_dir, "street_network.shp")
bikes_path = os.path.join(base_dir, "Bike_Network_Jan_2025.shp")
output_path = os.path.join(base_dir, "bikes_no_olap.shp")

# === PARAMETERS ===
BIKE_BUF = 3          # Expand bike lanes by a buffer (for calc. only)
MIN_OVERLAP = 0.20    # Min % of road segment length that must overlap a bike buffer

# === LOAD AND PREPARE DATA ===
roads, bikes = gpd.read_file(roads_path), gpd.read_file(bikes_path)
bikes = bikes.to_crs(roads.crs).dropna(subset=["geometry"])
roads = roads.dropna(subset=["geometry"])
bikes["buffered_geom"] = bikes.geometry.buffer(BIKE_BUF)

# Spatial indexing for efficient overlap checks
roads_sindex = roads.sindex
bikes_sindex = bikes.sindex

In [2]:
# Create ExisFacil2 column based on ExisFacil with specified replacements
bikes["ExisFacil2"] = bikes["ExisFacil"].replace({
    "SUPN": "SUP_NAT",
    "SUPM": "SUP_MINOR",
    "BL-PEAKBUS": "BL_PK_BUS_BL",
    "BFBL": "BL_BUF",
    "CFBL": "MIX_CONTRA",
    "CFSBL": "SBL_CONTRA",
    "SBLBL": "SBL_BL",
    "SBLSL": "SBL_MIX",
    "BLSL": "BL_MIX",
    "PED": "CARFREE",
    "WALK": "WALK_YR_BIKE"
})

In [3]:
# === FUNCTION TO CALCULATE OVERLAP PERCENTAGE ===
def get_road_overlap_percentage(road_geom, bike_buffer):
    """
    Calculates the percentage of a road segment that lies within a bike buffer.
    """
    if (road_geom is None or bike_buffer is None) or (road_geom.is_empty or bike_buffer.is_empty):
        return 0.0
    overlap_length = road_geom.intersection(bike_buffer).length
    return overlap_length / road_geom.length

In [4]:
# === DICTIONARY TO STORE BIKE-ROAD MAPPINGS ===
bike_road_overlap_dict = {}

def populate_bike_road_overlap_dict():
    """
    Creates a dictionary mapping bike unique_ids to lists of overlapping road unique_ids.
    Each road is associated with the bike buffer with the greatest overlap.
    """
    for _, bike in bikes.iterrows():
        bike_id = bike["unique_id"]
        overlapping_roads = []

        # Find candidate roads using spatial index
        possible_roads_idx = list(roads_sindex.intersection(bike["buffered_geom"].bounds))
        possible_roads = roads.iloc[possible_roads_idx]

        for _, road in possible_roads.iterrows():
            overlap_percentage = get_road_overlap_percentage(road.geometry, bike["buffered_geom"])
            if overlap_percentage >= MIN_OVERLAP:
                overlapping_roads.append((road["unique_id"], overlap_percentage))

        # Sort by overlap percentage and store only the best match for each road
        overlapping_roads.sort(key=lambda x: x[1], reverse=True)
        best_overlapping_roads = [road_id for road_id, _ in overlapping_roads]

        bike_road_overlap_dict[bike_id] = best_overlapping_roads

populate_bike_road_overlap_dict()

In [5]:
# NEU footbridge
bike_road_overlap_dict[3468]

[24565]

In [6]:
# Neponset River Trail
bike_road_overlap_dict[3007]

[18472, 17581, 17580, 18480]

In [7]:
# University Dr
bike_road_overlap_dict[3381]

[]

In [None]:
# === FUNCTION TO ASSIGN bike_type2 BASED ON DICT ===
def assign_best_bike_type_from_dict():
    """
    Assigns bike types to roads based on the overlap dictionary, ensuring one-to-one or one-to-many mappings.
    """
    for bike_id, road_ids in bike_road_overlap_dict.items():
        # Find the ExisFacil2 value for the current bike segment
        bike_exisfacil2_value = bikes.loc[bikes["unique_id"] == bike_id, "ExisFacil2"].values
        if bike_exisfacil2_value.size == 0:
            continue  # Skip if no ExisFacil2 value is found for the bike segment

        # Assign ExisFacil2 value to overlapping roads
        for road_id in road_ids:
            roads.loc[roads["unique_id"] == road_id, "bike_type2"] = bike_exisfacil2_value[0]

# === SAVE BIKE SEGMENTS WITH NO ROAD ASSOCIATION ===
def save_unassociated_bike_segments():
    """
    Saves bike segments that do not meet the overlap threshold with any road segment.
    """
    unassociated_bike_ids = [bike_id for bike_id, roads in bike_road_overlap_dict.items() if not roads]
    unassociated_bikes = bikes[bikes["unique_id"].isin(unassociated_bike_ids)].copy()
    unassociated_bikes = unassociated_bikes.drop(columns=["buffered_geom"])
    unassociated_bikes.to_file(output_path, driver="ESRI Shapefile")
    print(f"Saved {len(unassociated_bikes)} bike segments with < {MIN_OVERLAP * 100}% overlap.")


# === INITIALIZE bike_type2 COLUMN AND ASSIGN VALUES ===
# roads["bike_type2"] = None
assign_best_bike_type_from_dict()

# Save unassociated bike segments
save_unassociated_bike_segments()

# === PRINT OUTPUT ===
non_null_bike_type2_count = roads["bike_type2"].notna().sum()
print(f"Number of road segments with assigned 'bike_type2': {non_null_bike_type2_count}")

Saved 147 bike segments with < 20.0% overlap.
Number of road segments with assigned 'bike_type2': 6144


In [9]:
# === SAVE THE UPDATED ROADS SHAPEFILE ===
roads.to_file(roads_path, driver="ESRI Shapefile")
print(f"Updated road segments with best bike type based on {MIN_OVERLAP * 100}% minimum overlap.")

Updated road segments with best bike type based on 20.0% minimum overlap.


In [10]:
# Filter rows where bike_type2 is empty or null but bike_type is not
filtered_roads = roads[(roads['bike_type2'].isna() | (roads['bike_type2'] == "")) & roads['bike_type'].notna()]

filtered_roads['bike_type'].value_counts()

bike_type
0                               15010
Off-Road Path                    3909
Proposed                         1864
Bike Lane                         265
Buffered Bike                     147
Sharrow                           111
Walk only                          19
Bus bike lane                       5
Cycle track                         1
Cycletrack- bike lane hybrid        1
Priority share                      1
Name: count, dtype: int64