In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely import wkb
import os
from shapely.ops import substring
from shapely.geometry import LineString, MultiLineString

fdot_manifest = pd.read_parquet("bronze_fdot_manifest.parquet")
roadway_path = fdot_manifest[fdot_manifest['layer'] == 'roadway']['path'].values[0]
osm_nodes = gpd.read_parquet("bronze_osm_nodes.parquet")
osm_edges = gpd.read_parquet("bronze_osm_network.parquet")

if isinstance(osm_edges.index, pd.MultiIndex) or osm_edges.index.name is not None:
    osm_edges = osm_edges.reset_index()

fdot_master = pd.read_parquet(roadway_path)
if isinstance(fdot_master['geometry'].iloc[0], (bytes, bytearray)):
    fdot_master['geometry'] = fdot_master['geometry'].apply(wkb.loads) # parquet quirk, since it becomes binary we need to deserialize it

fdot_gdf = gpd.GeoDataFrame(fdot_master, geometry='geometry', crs="EPSG:26917")


In [2]:
exclude = {'traffic_signal_locations', 'intersection', 'railcross', 'interchange', 'roadway', 'weigh_in_motion'}
for _, row in fdot_manifest.iterrows():
    if row['layer'] in exclude: continue
    try:
        attr_df = pd.read_parquet(row['path']).drop(columns=['geometry'], errors='ignore')
        attr_unique = attr_df.groupby('ROADWAY').first().reset_index()
        fdot_gdf = fdot_gdf.merge(attr_unique, on='ROADWAY', how='left', suffixes=('', f"_{row['layer']}"))
    except Exception as e:
        print(f"! Error merging {row['layer']}: {e}")

bloat = ['OBJECTID', 'Shape_Length', 'Shape_Leng', 'DISTRICT', 'COUNTYDOT']
fdot_gdf = fdot_gdf.drop(columns=[c for c in fdot_gdf.columns if any(x in c for x in bloat)], errors='ignore')

In [3]:
pd.set_option('display.max_seq_items', None)

print(f"Total Columns: {len(fdot_gdf.columns)}")
print("-" * 30)
print(fdot_gdf.columns.tolist())

Total Columns: 317
------------------------------
['ROADWAY', 'DESCR', 'ROAD_DIREC', 'COUNTY', 'MNG_DIST', 'BEGIN_POST', 'END_POST', 'geometry', 'YEAR_', 'COSITE', 'DESC_FRM', 'DESC_TO', 'AADT', 'AADTFLG', 'KFLG', 'K100FLG', 'DFLG', 'TFLG', 'COUNTY_aadt', 'MNG_DIST_aadt', 'BEGIN_POST_aadt', 'END_POST_aadt', 'KFCTR', 'K100FCTR', 'DFCTR', 'TFCTR', 'ACCESS_CLA', 'COUNTY_access_management', 'MNG_DIST_access_management', 'BEGIN_POST_access_management', 'END_POST_access_management', 'ROAD_ID', 'RD_STATUS', 'NUMSECTION', 'COUNTY_basemap_route_road', 'SECTION_', 'COUNTYNM', 'MNG_DIST_basemap_route_road', 'BEGIN_POST_basemap_route_road', 'END_POST_basemap_route_road', 'RTLENGTH', 'HIGHMEASUR', 'SHAPE_Leng', 'ROAD_SIDE', 'LNCD', 'DESCR_bike_lane', 'COUNTY_bike_lane', 'MNG_DIST_bike_lane', 'BEGIN_POST_bike_lane', 'END_POST_bike_lane', 'ROAD_SIDE_bike_slot', 'LNCD_bike_slot', 'DESCR_bike_slot', 'COUNTY_bike_slot', 'MNG_DIST_bike_slot', 'BEGIN_POST_bike_slot', 'END_POST_bike_slot', 'ROAD_SIDE_box_c

In [4]:
def get_bearing_vectorized(gdf):
    """Calculate bearing (0-180) for linestrings."""
    if gdf.empty: 
        return pd.Series(dtype=float)
    
    exploded = gdf.explode(index_parts=False)
    exploded = exploded[exploded.geometry.type.isin(['LineString', 'LinearRing'])]
    
    coords = exploded.geometry.apply(
        lambda g: (g.coords[0], g.coords[-1]) if g and not g.is_empty else ((None, None), (None, None))
    )
    coords_array = np.array(coords.tolist())
    
    dx = coords_array[:, 1, 0] - coords_array[:, 0, 0]
    dy = coords_array[:, 1, 1] - coords_array[:, 0, 1]
    bearings = np.degrees(np.arctan2(dx, dy)) % 180
    
    return pd.Series(bearings, index=exploded.index).groupby(level=0).first()

def clip_endpoints(geom, offset=20):
    """Trim ends to avoid intersection false positives."""
    if geom is None or geom.is_empty: 
        return geom
    
    if isinstance(geom, LineString):
        if geom.length <= (offset * 2): 
            return geom
        return substring(geom, offset, geom.length - offset)
    
    if isinstance(geom, MultiLineString):
        parts = [substring(p, offset, p.length - offset) if p.length > (offset * 2) else p 
                 for p in geom.geoms]
        return MultiLineString(parts)
    
    return geom

def run_conflation(source_gdf, target_gdf, target_cols, 
                   max_dist=10, angle_tol=40, 
                   clip_target=True, target_offset=20,
                   min_vote_ratio=0.5,
                   check_vertical_separation=True):
    
    # Make sure we have an index to work with
    if source_gdf.index.name != 'fid':
        source_gdf = source_gdf.copy()
        source_gdf.index.name = 'fid'
    
    print(f"  Prepping target ({len(target_gdf):,} features)...")
    target = target_gdf.copy()
    target['target_bearing'] = get_bearing_vectorized(target)
    
    if clip_target:
        print(f"  Clipping {target_offset}m from ends...")
        target['geometry'] = target.geometry.apply(lambda x: clip_endpoints(x, offset=target_offset))
    
    print("  Calculating source bearings...")
    source_bearings = get_bearing_vectorized(source_gdf)
    
    # Extract coordinate points from each line
    print("  Extracting voter points...")
    
    def extract_points(geom):
        if geom is None or geom.is_empty:
            return []
        if hasattr(geom, 'coords'):
            return list(geom.coords)
        return []
    
    source_point_counts = {}
    voter_data = []
    
    for idx, geom in source_gdf.geometry.items():
        coords = extract_points(geom)
        source_point_counts[idx] = len(coords)
        bearing = source_bearings.get(idx)
        
        for coord in coords:
            voter_data.append({
                'fid': idx,
                'x': coord[0],
                'y': coord[1],
                'source_bearing': bearing
            })
    
    if not voter_data:
        return pd.DataFrame(index=source_gdf.index)
    
    voters_df = pd.DataFrame(voter_data)
    voters = gpd.GeoDataFrame(
        voters_df[['fid', 'source_bearing']], 
        geometry=gpd.points_from_xy(voters_df['x'], voters_df['y']), 
        crs=source_gdf.crs
    )
    
    # Spatial join
    print(f"  Spatial join (max {max_dist}m)...")
    joined = gpd.sjoin_nearest(
        voters, 
        target[['target_bearing', 'geometry'] + target_cols], 
        max_distance=max_dist, 
        distance_col="snap_dist"
    )
    
    if joined.empty: 
        print("  No matches found")
        return pd.DataFrame(index=source_gdf.index)
    
    # Filter by bearing
    angle_diff = np.abs(joined['source_bearing'] - joined['target_bearing']) % 180
    parallel_mask = (angle_diff < angle_tol) | (angle_diff > (180 - angle_tol))
    valid = joined[parallel_mask].copy()
    
    if valid.empty: 
        print("  No parallel matches")
        return pd.DataFrame(index=source_gdf.index)
    
    # Check for overpasses
    if check_vertical_separation:
        print("  Filtering overpasses...")
        
        source_has_bridge = source_gdf['bridge'].notna() if 'bridge' in source_gdf.columns else pd.Series(False, index=source_gdf.index)
        source_has_tunnel = source_gdf['tunnel'].notna() if 'tunnel' in source_gdf.columns else pd.Series(False, index=source_gdf.index)
        source_layer = source_gdf['layer'].fillna(0) if 'layer' in source_gdf.columns else pd.Series(0, index=source_gdf.index)
        
        target_has_bridge = target['bridge'].notna() if 'bridge' in target.columns else pd.Series(False, index=target.index)
        target_has_tunnel = target['tunnel'].notna() if 'tunnel' in target.columns else pd.Series(False, index=target.index)
        target_layer = target['layer'].fillna(0) if 'layer' in target.columns else pd.Series(0, index=target.index)
        
        valid['source_bridge'] = valid['fid'].map(source_has_bridge)
        valid['source_tunnel'] = valid['fid'].map(source_has_tunnel)
        valid['source_layer'] = valid['fid'].map(source_layer)
        
        valid['target_bridge'] = valid['index_right'].map(target_has_bridge)
        valid['target_tunnel'] = valid['index_right'].map(target_has_tunnel)
        valid['target_layer'] = valid['index_right'].map(target_layer)
        
        vertical_separation = (
            (valid['source_layer'] != valid['target_layer']) |
            (valid['source_bridge'] != valid['target_bridge']) |
            (valid['source_tunnel'] != valid['target_tunnel'])
        )
        
        before = len(valid)
        valid = valid[~vertical_separation]
        rejected = before - len(valid)
        
        if rejected > 0:
            print(f"    Rejected {rejected:,} overpass matches")
        
        valid = valid.drop(columns=['source_bridge', 'source_tunnel', 'source_layer', 
                                   'target_bridge', 'target_tunnel', 'target_layer'], errors='ignore')
    
    if valid.empty:
        print("  No matches after filtering")
        return pd.DataFrame(index=source_gdf.index)
    
    print(f"  Found {len(valid):,} voter-target pairs")
    
    # Count votes
    print("  Tallying votes...")
    
    vote_summary = (
        valid.groupby(['fid', 'index_right'])
        .agg(
            vote_count=('snap_dist', 'count'), 
            avg_snap_dist=('snap_dist', 'mean'),
            min_snap_dist=('snap_dist', 'min'),
            max_snap_dist=('snap_dist', 'max')
        )
        .reset_index()
    )
    
    vote_summary['total_points'] = vote_summary['fid'].map(source_point_counts)
    vote_summary['vote_ratio'] = vote_summary['vote_count'] / vote_summary['total_points']
    
    # Require minimum consensus
    quality_mask = vote_summary['vote_ratio'] >= min_vote_ratio
    vote_summary = vote_summary[quality_mask]
    
    if vote_summary.empty:
        print(f"  No matches met {min_vote_ratio*100:.0f}% threshold")
        return pd.DataFrame(index=source_gdf.index)
    
    # Pick best match per source
    winners = (
        vote_summary
        .sort_values(['fid', 'vote_count', 'avg_snap_dist'], ascending=[True, False, True])
        .drop_duplicates('fid')
        .set_index('fid')
    )
    
    print(f"  {len(winners):,} matches above threshold")
    
    result = winners.join(target[target_cols], on='index_right')
    return result

# Run conflation
print("="*60)
print("OSM + FDOT + County Conflation")
print("="*60)

print("\nPrepping OSM network...")
enriched_network = osm_edges.copy()

if 'fid' not in enriched_network.columns:
    enriched_network = enriched_network.reset_index()
    if 'index' in enriched_network.columns:
        enriched_network = enriched_network.rename(columns={'index': 'fid'})
    else:
        enriched_network['fid'] = range(len(enriched_network))

enriched_network = enriched_network.set_index('fid')

# Exclude service roads from voting
voter_mask = enriched_network['highway'] != 'service'
voter_network = enriched_network[voter_mask].copy()
print(f"  {len(enriched_network):,} total segments")
print(f"  {len(voter_network):,} voters (excluded {len(enriched_network) - len(voter_network):,} service roads)")

# FDOT conflation
print("\nRunning FDOT conflation...")

fdot_rename = {
    'ROADWAY': 'FDOT_ROADWAY',
    'DESCR': 'FDOT_DESCR',
    'FUNCLASS': 'FDOT_FUNCTIONAL_CLASS',
    'SPEED': 'FDOT_SPEED',
    'LANE_CNT': 'FDOT_LANE_COUNT',
    'AADT': 'FDOT_AADT',
    'TruckAADT': 'FDOT_TruckAADT',
    'PFC': 'FDOT_RAMP_TYPE',
    'ROAD_TYPE': 'FDOT_ROAD_TYPE',
    'MEDIAN_TYP': 'FDOT_MEDIAN_TYPE',
    'ACCESS_CLA': 'FDOT_ACCESS_CLASS',
    'ROAD_STATU': 'FDOT_ROAD_STATUS',
    'ON_OFF_SYS': 'FDOT_ON_OFF_SYSTEM',
    'STRUCTURE__bridges': 'FDOT_BRIDGES',
}

fdot_cols = list(fdot_rename.values())

fdot_matches = run_conflation(
    voter_network, 
    fdot_gdf.rename(columns=fdot_rename), 
    fdot_cols,
    max_dist=15,
    angle_tol=40,
    clip_target=True,
    target_offset=20,
    min_vote_ratio=0.4,
    check_vertical_separation=True
)

# Merge back
enriched_network = enriched_network.drop(columns=fdot_cols + ['fdot_snap_dist'], errors='ignore')
fdot_results = fdot_matches.drop(columns=['vote_count', 'index_right', 'total_points', 'vote_ratio', 'min_snap_dist', 'max_snap_dist'], errors='ignore')
if 'avg_snap_dist' in fdot_results.columns:
    fdot_results = fdot_results.rename(columns={'avg_snap_dist': 'fdot_snap_dist'})
enriched_network = enriched_network.join(fdot_results)

print(f"  Matched {fdot_matches.index.nunique():,} segments")

# County conflation
print("\nLoading county data...")
pbc_gdf = gpd.read_file("Road_Centerlines.geojson").to_crs(enriched_network.crs)
pbc_gdf = pbc_gdf.rename(columns={
    'SPEED_LIM': 'COUNTY_SPEED',
    'LANES': 'COUNTY_LANES',
    'RESP_AUTH': 'OWNER',
    'NAME': 'COUNTY_NAME',
    'FUNC_CLASS': 'COUNTY_FUNCTIONAL_CLASS',
})

pbc_cols = ['COUNTY_NAME', 'COUNTY_SPEED', 'COUNTY_LANES', 'OWNER', 'COUNTY_FUNCTIONAL_CLASS']

print("\nRunning county conflation...")
pbc_matches = run_conflation(
    voter_network, 
    pbc_gdf, 
    pbc_cols,
    max_dist=15,
    angle_tol=30,
    clip_target=True,
    target_offset=5,
    min_vote_ratio=0.6,
    check_vertical_separation=True
)

# Merge back
enriched_network = enriched_network.drop(columns=pbc_cols + ['pbc_snap_dist'], errors='ignore')
pbc_results = pbc_matches.drop(columns=['vote_count', 'index_right', 'total_points', 'vote_ratio', 'min_snap_dist', 'max_snap_dist'], errors='ignore')
if 'avg_snap_dist' in pbc_results.columns:
    pbc_results = pbc_results.rename(columns={'avg_snap_dist': 'pbc_snap_dist'})
enriched_network = enriched_network.join(pbc_results)

print(f"  Matched {pbc_matches.index.nunique():,} segments")

# Cleanup
print("\nFinalizing...")

if 'maxspeed' in enriched_network.columns:
    enriched_network['osm_maxspeed'] = (
        enriched_network['maxspeed']
        .astype(str)
        .str.extract(r'(\d+)', expand=False)
        .astype(float)
    )

enriched_network["is_roundabout"] = (enriched_network["junction"] == "roundabout")
enriched_network["is_connector"] = enriched_network['highway'].str.contains('link', na=False)

print("\n" + "="*60)
print(f"Total: {len(enriched_network):,} segments")
print(f"FDOT: {enriched_network['FDOT_ROADWAY'].notna().sum():,} matches")
print(f"County: {enriched_network['COUNTY_NAME'].notna().sum():,} matches")
print(f"OSM speed: {enriched_network['osm_maxspeed'].notna().sum():,}")
print(f"Connectors: {enriched_network['is_connector'].sum():,}")
print("="*60)

enriched_network = enriched_network.drop(columns=["maxspeed", "key"], errors='ignore')


OSM + FDOT + County Conflation

Prepping OSM network...
  947,921 total segments
  511,011 voters (excluded 436,910 service roads)

Running FDOT conflation...
  Prepping target (1,197 features)...
  Clipping 20m from ends...
  Calculating source bearings...
  Extracting voter points...
  Spatial join (max 15m)...
  Filtering overpasses...
    Rejected 2,415 overpass matches
  Found 174,846 voter-target pairs
  Tallying votes...
  90,985 matches above threshold
  Matched 90,985 segments

Loading county data...

Running county conflation...
  Prepping target (64,276 features)...
  Clipping 5m from ends...
  Calculating source bearings...
  Extracting voter points...
  Spatial join (max 15m)...
  Filtering overpasses...
    Rejected 3,404 overpass matches
  Found 724,336 voter-target pairs
  Tallying votes...
  294,441 matches above threshold
  Matched 294,441 segments

Finalizing...

Total: 947,921 segments
FDOT: 90,985 matches
County: 294,441 matches
OSM speed: 69,101
Connectors: 10,889

In [5]:
import pandas as pd
import geopandas as gpd
import numpy as np
import networkx as nx

# Graph-based connector speed resolution
# Runs after conflation, fills missing speeds on ramps/links by walking the network
# to find what roads they connect

try:
    df = enriched_network.copy()
    if isinstance(df.index, pd.MultiIndex) or df.index.name == 'fid':
        df = df.reset_index()
    print("Using enriched_network from previous cell")
except NameError:
    print("Loading from parquet (standalone mode)...")
    df = gpd.read_parquet("bronze_osm_network.parquet")
    if isinstance(df.index, pd.MultiIndex) or df.index.name is not None:
        df = df.reset_index()

# Clear previous run if this is being re-executed
reset_cols = ['connector_transition','connector_upstream','connector_downstream']
if 'connector_transition' in df.columns:
    prev_filled = df['connector_transition'].notna()
    n_reset = prev_filled.sum()
    if n_reset > 0:
        df.loc[prev_filled, 'osm_maxspeed'] = np.nan
        print(f"  Reset {n_reset:,} previously filled speeds")
for col in reset_cols:
    if col in df.columns:
        df[col] = np.nan

# Handle maxspeed column
if 'osm_maxspeed' not in df.columns:
    if 'maxspeed' in df.columns:
        print("  Converting maxspeed...")
        df['osm_maxspeed'] = (
            df['maxspeed'].astype(str)
            .str.extract(r'(\d+)', expand=False)
            .astype(float)
        )
    else:
        print("  No maxspeed found")
        df['osm_maxspeed'] = np.nan

total = len(df)
link_types = ['motorway_link','trunk_link','primary_link','secondary_link','tertiary_link']
link_mask  = df['highway'].isin(link_types)
print(f"  {total:,} total segments")
print(f"  {link_mask.sum():,} link segments")

# Default speeds by road type
mainline_speed = {
    'motorway':     70,
    'trunk':        55,
    'primary':      45,
    'secondary':    35,
    'tertiary':     30,
    'unclassified': 25,
    'residential':  25,
    'living_street':15,
    'service':      15,
}

link_to_parent = {
    'motorway_link':  'motorway',
    'trunk_link':     'trunk',
    'primary_link':   'primary',
    'secondary_link': 'secondary',
    'tertiary_link':  'tertiary',
}

# Build graph
print("\nBuilding graph...")

df['row_idx'] = df.index

edge_data = df[['u','v','row_idx','highway','osm_maxspeed']].copy()
if 'target_speed' in df.columns:
    edge_data['speed'] = df['target_speed'].combine_first(df['osm_maxspeed'])
else:
    edge_data['speed'] = df['osm_maxspeed']

G = nx.DiGraph()
G.add_nodes_from(pd.concat([df['u'], df['v']]).unique())

for row in edge_data.itertuples(index=False):
    G.add_edge(
        row.u, row.v,
        idx=row.row_idx,
        highway=row.highway,
        speed=row.speed,
    )

print(f"  {G.number_of_nodes():,} nodes, {G.number_of_edges():,} edges")

# BFS to find mainline roads connected to each ramp
def find_mainline(G, start_node, direction='out', max_depth=50):
    """Walk graph to find first non-link mainline road."""
    visited = {start_node}
    queue = [(start_node, 0)]

    while queue:
        node, depth = queue.pop(0)
        if depth >= max_depth:
            continue

        if direction == 'out':
            edges = G.out_edges(node, data=True)
        else:
            edges = G.in_edges(node, data=True)

        for u, v, data in edges:
            neighbour = v if direction == 'out' else u
            if neighbour in visited:
                continue
            visited.add(neighbour)

            hw = data.get('highway', '')
            speed = data.get('speed')

            # Skip service/residential - they're noisy at ramp endpoints
            if hw in mainline_speed:
                if hw in ('service', 'residential', 'living_street'):
                    queue.append((neighbour, depth + 1))
                else:
                    return hw, speed
                continue

            queue.append((neighbour, depth + 1))

    return None, None

# Process each link
print("\nResolving ramp speeds...")

links_df = df[link_mask].copy()
n_links = len(links_df)

records = []
for i, (idx, row) in enumerate(links_df.iterrows()):
    if i % 1000 == 0:
        print(f"  {i:>6,} / {n_links:,}", end='\r')

    link_hw = row['highway']
    u_node = row['u']
    v_node = row['v']
    parent_hw = link_to_parent.get(link_hw)
    parent_spd = mainline_speed.get(parent_hw, 45)

    # Walk upstream and downstream
    upstream_hw, upstream_spd_raw = find_mainline(G, u_node, direction='in')
    downstream_hw, downstream_spd_raw = find_mainline(G, v_node, direction='out')

    # Use actual speeds if available, otherwise defaults
    up_spd = upstream_spd_raw if pd.notna(upstream_spd_raw) else mainline_speed.get(upstream_hw)
    down_spd = downstream_spd_raw if pd.notna(downstream_spd_raw) else mainline_speed.get(downstream_hw)

    up_valid = pd.notna(up_spd)
    down_valid = pd.notna(down_spd)

    # Only trust direction if road is actually oneway
    is_oneway = str(row.get('oneway')).strip() in ('True', 'yes', '1')

    if up_valid and down_valid:
        delta = down_spd - up_spd
        if not is_oneway:
            transition = 'lateral'
            inferred_spd = int(round((up_spd + down_spd) / 2))
        elif delta > 5:
            transition = 'acceleration'
            inferred_spd = int(round(up_spd + (delta * 0.65)))
        elif delta < -5:
            transition = 'deceleration'
            inferred_spd = int(round(up_spd + (delta * 0.65)))
        else:
            transition = 'lateral'
            inferred_spd = int(round((up_spd + down_spd) / 2))

        inferred_spd = min(inferred_spd, parent_spd)
        inferred_spd = max(inferred_spd, 15)

    elif down_valid:
        transition = 'entry_only' if is_oneway else 'lateral'
        inferred_spd = int(min(down_spd, parent_spd))
    elif up_valid:
        transition = 'exit_only' if is_oneway else 'lateral'
        inferred_spd = int(min(up_spd, parent_spd))
    else:
        transition = 'unknown'
        inferred_spd = None

    records.append({
        'idx': idx,
        'link_hw': link_hw,
        'upstream_hw': upstream_hw,
        'downstream_hw': downstream_hw,
        'upstream_spd': up_spd,
        'downstream_spd': down_spd,
        'transition': transition,
        'inferred_spd': inferred_spd,
        'current_spd': row.get('osm_maxspeed'),
    })

print(f"\n  Done")
results = pd.DataFrame(records).set_index('idx')

# Print summary
print("\n" + "=" * 55)
print("CONNECTOR RESULTS")
print("=" * 55)

print(f"\nTransition types:")
for t, cnt in results['transition'].value_counts().items():
    pct = cnt / len(results) * 100
    bar = "█" * int(pct / 2)
    print(f"  {t:<15} {cnt:>6,}  ({pct:>5.1f}%)  {bar}")

print(f"\nUpstream roads found:")
for hw, cnt in results['upstream_hw'].value_counts().items():
    print(f"  {str(hw):<22} {cnt:>6,}")
print(f"  {'(none)':<22} {results['upstream_hw'].isna().sum():>6,}")

print(f"\nDownstream roads found:")
for hw, cnt in results['downstream_hw'].value_counts().items():
    print(f"  {str(hw):<22} {cnt:>6,}")
print(f"  {'(none)':<22} {results['downstream_hw'].isna().sum():>6,}")

resolved = results[results['inferred_spd'].notna()].copy()
no_current = resolved[resolved['current_spd'].isna()]
has_current = resolved[resolved['current_spd'].notna()]

print(f"\nSpeed resolution:")
print(f"  Inferred: {len(resolved):,} / {len(results):,}")
print(f"  Will fill: {len(no_current):,}")
print(f"  Already have speed: {len(has_current):,}")

if len(no_current) > 0:
    print(f"\n  Speed distribution (new fills):")
    for spd, cnt in no_current['inferred_spd'].value_counts().sort_index().items():
        print(f"    {int(spd):>3} mph  {cnt:>5,}")

print(f"\nAcceleration examples:")
accel = results[results['transition'] == 'acceleration'].head(6)
print(accel[['link_hw','upstream_hw','downstream_hw',
             'upstream_spd','downstream_spd','inferred_spd','current_spd']].to_string())

print(f"\nDeceleration examples:")
decel = results[results['transition'] == 'deceleration'].head(6)
print(decel[['link_hw','upstream_hw','downstream_hw',
             'upstream_spd','downstream_spd','inferred_spd','current_spd']].to_string())

# Apply results
print("\nApplying speeds...")

for _col in ['connector_transition', 'connector_upstream', 'connector_downstream']:
    if _col not in df.columns:
        df[_col] = pd.Series(pd.NA, index=df.index, dtype=object)
    else:
        df[_col] = df[_col].astype(object)

filled = 0
skipped = 0
for idx, row in results.iterrows():
    inferred = row['inferred_spd']
    if pd.isna(inferred) or row['transition'] == 'unknown':
        continue
    if pd.isna(df.at[idx, 'osm_maxspeed']):
        df.at[idx, 'osm_maxspeed'] = float(inferred)
        df.at[idx, 'connector_transition'] = row['transition']
        df.at[idx, 'connector_upstream'] = row['upstream_hw']
        df.at[idx, 'connector_downstream'] = row['downstream_hw']
        filled += 1
    else:
        skipped += 1

print(f"  Filled {filled:,} links")
print(f"  Skipped {skipped:,} (already had speed)")
print(f"  Unknown {(results['transition'] == 'unknown').sum():,} (left blank)")

filled_df = df[df['connector_transition'].notna()] if 'connector_transition' in df.columns else pd.DataFrame()
if len(filled_df) > 0:
    print(f"\n  Breakdown:")
    for t, cnt in filled_df['connector_transition'].value_counts().items():
        print(f"    {t:<15} {cnt:>5,}")

# Write back
new_cols = ['osm_maxspeed','connector_transition','connector_upstream','connector_downstream']
for col in new_cols:
    if col in df.columns:
        enriched_network[col] = df[col].values

enriched_network.to_parquet("bronze_osm_network.parquet")
print(f"\nSaved to bronze_osm_network.parquet")
print(f"New columns: {', '.join(reset_cols)}")

Using enriched_network from previous cell
  947,921 total segments
  10,887 link segments

Building graph...
  493,737 nodes, 947,921 edges

Resolving ramp speeds...
  10,000 / 10,887
  Done

CONNECTOR RESULTS

Transition types:
  lateral          4,765  ( 43.8%)  █████████████████████
  acceleration     2,822  ( 25.9%)  ████████████
  deceleration     2,541  ( 23.3%)  ███████████
  exit_only          409  (  3.8%)  █
  entry_only         323  (  3.0%)  █
  unknown             27  (  0.2%)  

Upstream roads found:
  primary                 4,591
  motorway                2,135
  secondary               2,108
  tertiary                1,166
  trunk                     510
  unclassified               27
  (none)                    350

Downstream roads found:
  primary                 3,924
  motorway                2,544
  secondary               2,067
  tertiary                1,233
  trunk                     532
  unclassified              151
  (none)                    436

Speed 

In [6]:
landuse_polys = gpd.read_parquet("bronze_osm_landuse.parquet")

def add_landuse_context(enriched_network, landuse_polys):
    """Spatial join landuse polygons to roads."""
    print("\nAdding landuse context...")

    # Find the landuse column (OSM naming varies)
    lu_cols = [c for c in landuse_polys.columns if 'landuse' in c.lower()]
    if not lu_cols:
        print("  No landuse column found, skipping")
        return enriched_network
    
    lu_col = lu_cols[0]
    if lu_col != 'landuse':
        landuse_polys = landuse_polys.rename(columns={lu_col: 'landuse'})

    # Match CRS
    if landuse_polys.crs != enriched_network.crs:
        landuse_polys = landuse_polys.to_crs(enriched_network.crs)

    roads = enriched_network.reset_index()
    id_col = 'fid' if 'fid' in roads.columns else roads.columns[0]

    roads_with_landuse = gpd.sjoin(
        roads[[id_col, 'geometry']],
        landuse_polys[['geometry', 'landuse']],
        how='left',
        predicate='intersects'
    )

    if 'landuse' not in roads_with_landuse.columns:
        print("  sjoin failed, skipping")
        return enriched_network

    # Take most common landuse per road segment
    landuse_mode = (
        roads_with_landuse.groupby(id_col)['landuse']
        .apply(lambda x: x.mode()[0] if len(x.mode()) > 0 else None)
    )

    enriched_network['landuse'] = landuse_mode
    print(f"  Added landuse to {enriched_network['landuse'].notna().sum():,} segments")

    return enriched_network


def speed_arbitration(df):
    """Hierarchical speed assignment: OSM → FDOT/County → Mode → Defaults."""
    print("Running speed arbitration...")
    
    target_speed = df['osm_maxspeed'].copy()
    
    # FDOT-owned roads: use FDOT first
    fdot_owned = (
        target_speed.isna() &
        (df['OWNER'] == 'FDOT') &
        df['FDOT_SPEED'].notna()
    )
    target_speed.loc[fdot_owned] = df.loc[fdot_owned, 'FDOT_SPEED']
    
    # County-owned: use county first (but skip suspicious 35 mph defaults)
    county_owned = (
        target_speed.isna() &
        (df['OWNER'].isin(['COUNTY', 'MUN', 'CITY'])) &
        (df['COUNTY_SPEED'].notna()) &
        (df['COUNTY_SPEED'] != 35)
    )
    target_speed.loc[county_owned] = df.loc[county_owned, 'COUNTY_SPEED']
    
    # Fallbacks: try opposite source
    fdot_county_fallback = (
        target_speed.isna() &
        (df['OWNER'] == 'FDOT') &
        (df['COUNTY_SPEED'].notna()) &
        (df['COUNTY_SPEED'] != 35)
    )
    target_speed.loc[fdot_county_fallback] = df.loc[fdot_county_fallback, 'COUNTY_SPEED']
    
    county_fdot_fallback = (
        target_speed.isna() &
        (df['OWNER'].isin(['COUNTY', 'MUN', 'CITY'])) &
        df['FDOT_SPEED'].notna()
    )
    target_speed.loc[county_fdot_fallback] = df.loc[county_fdot_fallback, 'FDOT_SPEED']
    
    # Mode fallback: use most common speed for this road name
    print("  Applying mode fallback...")
    temp_df = pd.DataFrame({'name': df['name'], 'target_speed': target_speed})
    road_mode = temp_df.groupby('name')['target_speed'].transform(
        lambda x: x.mode()[0] if len(x.mode()) > 0 else np.nan
    )
    target_speed = target_speed.fillna(road_mode)
    
    # Accept suspicious county 35 mph as last resort
    print("  Filling suspect county speeds...")
    county_suspect = (
        target_speed.isna() &
        (~df['OWNER'].isin(['PRI'])) &
        (df['COUNTY_SPEED'] == 35)
    )
    target_speed.loc[county_suspect] = 35
    
    # Track where each speed came from
    print("  Tracking sources...")
    speed_source = pd.Series('none', index=df.index)
    
    speed_source[df['osm_maxspeed'].notna()] = 'osm'
    
    fdot_primary = (
        (target_speed == df['FDOT_SPEED']) &
        (df['OWNER'] == 'FDOT') &
        df['FDOT_SPEED'].notna() &
        (speed_source == 'none')
    )
    speed_source[fdot_primary] = 'fdot_primary'
    
    fdot_fallback = (
        (target_speed == df['FDOT_SPEED']) &
        (df['OWNER'].isin(['COUNTY', 'MUN', 'CITY'])) &
        df['FDOT_SPEED'].notna() &
        (speed_source == 'none')
    )
    speed_source[fdot_fallback] = 'fdot_fallback'
    
    county_primary = (
        (target_speed == df['COUNTY_SPEED']) &
        (df['OWNER'].isin(['COUNTY', 'MUN', 'CITY'])) &
        (df['COUNTY_SPEED'] != 35) &
        df['COUNTY_SPEED'].notna() &
        (speed_source == 'none')
    )
    speed_source[county_primary] = 'county_primary'
    
    county_fallback = (
        (target_speed == df['COUNTY_SPEED']) &
        (df['OWNER'] == 'FDOT') &
        (df['COUNTY_SPEED'] != 35) &
        df['COUNTY_SPEED'].notna() &
        (speed_source == 'none')
    )
    speed_source[county_fallback] = 'county_fallback'
    
    county_suspect_source = (
        (target_speed == 35) &
        (df['COUNTY_SPEED'] == 35) &
        (~df['OWNER'].isin(['PRI'])) &
        (speed_source == 'none')
    )
    speed_source[county_suspect_source] = 'county_suspect'
    
    mode_source = (
        (target_speed == road_mode) &
        target_speed.notna() &
        (speed_source == 'none')
    )
    speed_source[mode_source] = 'mode'
    
    # Relabel graph-inferred connector speeds
    if 'connector_transition' in df.columns:
        connector_filled = (
            df['connector_transition'].notna() &
            (speed_source == 'osm')
        )
        speed_source[connector_filled & (df['connector_transition'] == 'acceleration')] = 'graph_acceleration'
        speed_source[connector_filled & (df['connector_transition'] == 'deceleration')] = 'graph_deceleration'
        speed_source[connector_filled & (df['connector_transition'] == 'lateral')] = 'graph_lateral'
        speed_source[connector_filled & (df['connector_transition'] == 'entry_only')] = 'graph_entry_only'
        speed_source[connector_filled & (df['connector_transition'] == 'exit_only')] = 'graph_exit_only'
        print(f"  Relabeled {connector_filled.sum():,} graph-inferred connectors")

    # Functional class defaults
    print("  Applying defaults...")
    highway_defaults = {
        'motorway': 70, 'motorway_link': 45, 'trunk': 55, 'trunk_link': 45,
        'primary': 45, 'primary_link': 35, 'secondary': 35, 'secondary_link': 30,
        'tertiary': 30, 'tertiary_link': 25, 'residential': 25, 'unclassified': 25,
        'living_street': 15, 'service': 15
    }
    
    for highway_type, default_speed in highway_defaults.items():
        mask = (target_speed.isna()) & (df['highway'] == highway_type)
        target_speed.loc[mask] = default_speed
        speed_source.loc[mask] = 'functional_class'
    
    # Roundabouts
    print("  Handling roundabouts...")
    roundabout_mask = df['junction'] == 'roundabout'
    roundabout_count = roundabout_mask.sum()
    
    if roundabout_count > 0:
        roundabout_needs_speed = roundabout_mask & target_speed.isna()
        target_speed.loc[roundabout_needs_speed] = 20
        speed_source.loc[roundabout_needs_speed] = 'roundabout_default'
        
        roundabout_too_fast = roundabout_mask & (target_speed > 30)
        if roundabout_too_fast.sum() > 0:
            print(f"    Capping {roundabout_too_fast.sum()} roundabouts at 25 mph")
            target_speed.loc[roundabout_too_fast] = 25
            speed_source.loc[roundabout_too_fast] = 'roundabout_capped'
        
        print(f"    Processed {roundabout_count:,} roundabouts")
    
    df['target_speed'] = target_speed
    
    # Name validation
    print("  Checking name matches...")
    
    osm_name_norm = df['name'].fillna('').astype(str).str.lower().str.strip()
    county_name_norm = df['COUNTY_NAME'].fillna('').astype(str).str.lower().str.strip()
    fdot_descr_norm = df['FDOT_DESCR'].fillna('').astype(str).str.lower().str.strip()
    
    name_match_osm_county = pd.Series(False, index=df.index)
    name_match_fdot_county = pd.Series(False, index=df.index)
    
    for idx in df.index:
        county_name = county_name_norm.loc[idx]
        if not county_name or county_name == 'nan':
            continue
        osm_name = osm_name_norm.loc[idx]
        if osm_name and osm_name != 'nan':
            if county_name in osm_name:
                name_match_osm_county.loc[idx] = True
        fdot_name = fdot_descr_norm.loc[idx]
        if fdot_name and fdot_name != 'nan':
            if county_name in fdot_name:
                name_match_fdot_county.loc[idx] = True
    
    name_matches = name_match_osm_county.sum() + name_match_fdot_county.sum()
    print(f"    Found {name_matches:,} matches")
    
    # Confidence scoring
    print("  Calculating confidence...")
    
    confidence_score = np.zeros(len(df))
    
    # Source quality weights
    confidence_score[speed_source == 'osm'] += 0.35
    confidence_score[speed_source == 'fdot_primary'] += 0.28
    confidence_score[speed_source == 'county_primary'] += 0.28
    confidence_score[speed_source == 'county_fallback'] += 0.18
    confidence_score[speed_source == 'fdot_fallback'] += 0.18
    confidence_score[speed_source == 'mode'] += 0.13
    confidence_score[speed_source == 'county_suspect'] += 0.07
    confidence_score[speed_source == 'functional_class'] += 0.04
    confidence_score[speed_source == 'roundabout_default'] += 0.25
    confidence_score[speed_source == 'roundabout_capped'] += 0.22
    confidence_score[speed_source == 'graph_lateral'] += 0.22
    confidence_score[speed_source == 'graph_acceleration'] += 0.18
    confidence_score[speed_source == 'graph_deceleration'] += 0.18
    confidence_score[speed_source == 'graph_entry_only'] += 0.12
    confidence_score[speed_source == 'graph_exit_only'] += 0.12
    
    # Name validation bonus
    county_data_used = speed_source.isin(['county_primary', 'county_fallback', 'county_suspect'])
    confidence_score[county_data_used & (name_match_osm_county | name_match_fdot_county)] += 0.12
    
    fdot_data_used = speed_source.isin(['fdot_primary', 'fdot_fallback'])
    confidence_score[fdot_data_used & name_match_fdot_county] += 0.08
    
    # Landuse validation
    landuse_valid = pd.Series(False, index=df.index)
    
    if 'landuse' in df.columns:
        print("    Validating against landuse...")
        
        landuse_speed_ranges = {
            'residential': (15, 35),
            'retail': (20, 35),
            'commercial': (25, 45),
            'industrial': (25, 45),
            'construction': (15, 25),
            'education': (15, 25),
            'institutional': (20, 35),
            'forest': (25, 55)
        }
        
        for landuse_type, (min_speed, max_speed) in landuse_speed_ranges.items():
            mask = (df['landuse'] == landuse_type) & df['target_speed'].notna()
            in_range = (df['target_speed'] >= min_speed) & (df['target_speed'] <= max_speed)
            landuse_valid[mask & in_range] = True
        
        confidence_score += landuse_valid * 0.10
        
        severe_mismatch = (df['landuse'] == 'residential') & (df['target_speed'] > 45)
        confidence_score[severe_mismatch] -= 0.1
    
    # Mode consistency
    mode_match = (df['target_speed'] == road_mode) & df['target_speed'].notna()
    confidence_score += mode_match * 0.10
    
    # Multi-source agreement
    osm_fdot_agree = (
        (np.abs(df['osm_maxspeed'].fillna(-999) - df['FDOT_SPEED'].fillna(-999)) <= 5) &
        df['FDOT_SPEED'].notna() &
        df['osm_maxspeed'].notna()
    )
    osm_county_agree = (
        (np.abs(df['osm_maxspeed'].fillna(-999) - df['COUNTY_SPEED'].fillna(-999)) <= 5) &
        df['COUNTY_SPEED'].notna() &
        df['osm_maxspeed'].notna() &
        (df['COUNTY_SPEED'] != 35)
    )
    triple_agree = (
        osm_fdot_agree & osm_county_agree &
        (np.abs(df['FDOT_SPEED'].fillna(-999) - df['COUNTY_SPEED'].fillna(-999)) <= 5)
    )
    two_agree = ((osm_fdot_agree | osm_county_agree) & ~triple_agree)
    
    confidence_score[two_agree] += 0.10
    confidence_score[triple_agree] += 0.20
    
    df['confidence_score'] = confidence_score.clip(0, 1)
    df['speed_source'] = speed_source
    
    # Summary
    print(f"\nDone: {len(df):,} segments")
    print(f"  Coverage: {df['target_speed'].notna().sum():,} ({df['target_speed'].notna().mean()*100:.1f}%)")
    print(f"\nSource breakdown:")
    print(f"  OSM: {(speed_source == 'osm').sum():,}")
    print(f"  FDOT (primary): {(speed_source == 'fdot_primary').sum():,}")
    print(f"  County (primary): {(speed_source == 'county_primary').sum():,}")
    print(f"  FDOT (fallback): {(speed_source == 'fdot_fallback').sum():,}")
    print(f"  County (fallback): {(speed_source == 'county_fallback').sum():,}")
    print(f"  Mode: {(speed_source == 'mode').sum():,}")
    print(f"  County (suspect): {(speed_source == 'county_suspect').sum():,}")
    print(f"  Functional class: {(speed_source == 'functional_class').sum():,}")
    
    if 'connector_transition' in df.columns:
        graph_total = speed_source.str.startswith('graph_').sum()
        if graph_total > 0:
            print(f"  Graph connectors: {graph_total:,}")
    
    print(f"\nConfidence:")
    print(f"  High (≥0.6): {(df['confidence_score'] >= 0.6).sum():,}")
    print(f"  Medium (0.3-0.6): {((df['confidence_score'] >= 0.3) & (df['confidence_score'] < 0.6)).sum():,}")
    print(f"  Low (<0.3): {(df['confidence_score'] < 0.3).sum():,}")
    
    if 'landuse' in df.columns:
        print(f"\nLanduse: {df['landuse'].notna().sum():,} segments tagged")
    
    return df


# Run
enriched_network = add_landuse_context(enriched_network, landuse_polys)
enriched_network = speed_arbitration(enriched_network)


Adding landuse context...
  Added landuse to 251,654 segments
Running speed arbitration...
  Applying mode fallback...
  Filling suspect county speeds...
  Tracking sources...
  Relabeled 10,236 graph-inferred connectors
  Applying defaults...
  Handling roundabouts...
    Capping 165 roundabouts at 25 mph
    Processed 5,470 roundabouts
  Checking name matches...
    Found 306,255 matches
  Calculating confidence...
    Validating against landuse...

Done: 947,921 segments
  Coverage: 947,530 (100.0%)

Source breakdown:
  OSM: 69,101
  FDOT (primary): 260
  County (primary): 120,947
  FDOT (fallback): 4,427
  County (fallback): 131
  Mode: 112,627
  County (suspect): 12,603
  Functional class: 617,033
  Graph connectors: 10,236

Confidence:
  High (≥0.6): 28,866
  Medium (0.3-0.6): 189,041
  Low (<0.3): 730,014

Landuse: 251,654 segments tagged


In [7]:
enriched_network.to_parquet("silver_enriched_network.parquet")

In [None]:
import pandas as pd

search_terms = ["northwest broken sound boulevard"]

pd.set_option('display.max_rows', None)  
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', 1000)

pattern = r'\b(?:' + '|'.join(search_terms) + r')\b'

candidate_results = enriched_network[
    enriched_network['name'].str.contains(pattern, case=False, na=False)
].copy()

if candidate_results.empty:
    print(f"No results found for terms: {search_terms}")
else:
    print(f"Displaying all {len(candidate_results)} segments for Jog Road...")
    display(candidate_results.sort_values(['COUNTY_NAME']))

pd.reset_option('display.max_rows')

Displaying all 97 segments for Jog Road...


Unnamed: 0_level_0,u,v,osmid,highway,name,ref,lanes,oneway,surface,reversed,length,turn:lanes,lit,bridge,layer,junction,width,tunnel,geometry,OSM_has_stop_u,OSM_has_signal_u,OSM_has_crossing_u,OSM_has_give_way_u,OSM_has_stop_v,OSM_has_signal_v,OSM_has_crossing_v,OSM_has_give_way_v,has_stop_at_ends,has_signal_at_ends,has_crossing_at_ends,has_give_way_at_ends,fdot_snap_dist,FDOT_ROADWAY,FDOT_DESCR,FDOT_FUNCTIONAL_CLASS,FDOT_SPEED,FDOT_LANE_COUNT,FDOT_AADT,FDOT_TruckAADT,FDOT_RAMP_TYPE,FDOT_ROAD_TYPE,FDOT_MEDIAN_TYPE,FDOT_ACCESS_CLASS,FDOT_ROAD_STATUS,FDOT_ON_OFF_SYSTEM,FDOT_BRIDGES,pbc_snap_dist,COUNTY_NAME,COUNTY_SPEED,COUNTY_LANES,OWNER,COUNTY_FUNCTIONAL_CLASS,osm_maxspeed,is_roundabout,is_connector,connector_transition,connector_upstream,connector_downstream,landuse,target_speed,confidence_score,speed_source,name_match
fid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1
86485,99717798,99717802,261776025,tertiary,Northwest Broken Sound Boulevard,,4,True,asphalt,False,18.237305049957996,left|left|through|right,,,,,,,"LINESTRING (588396.517 2919570.091, 588394.816 2919588.178)",0,0,0,0,0,0,0,0,0,0,0,0,4.027472,93000288.0,BROKEN SOUND BLVD,18.0,,1.0,28500.0,1340.0,,,17.0,,9.0,,,4.488366,Broken Sound,25.0,2.0,PRI,,,False,False,,,,,25.0,0.23,mode,True
86486,99717802,9902109862,261776025,tertiary,Northwest Broken Sound Boulevard,,4,True,asphalt,False,2.012852824479116,left|left|through|right,,,,,,,"LINESTRING (588394.816 2919588.178, 588394.773 2919590.183)",0,0,0,0,0,0,0,0,0,0,0,0,3.87446,93000288.0,BROKEN SOUND BLVD,18.0,,1.0,28500.0,1340.0,,,17.0,,9.0,,,5.247181,Broken Sound,25.0,2.0,PRI,,,False,False,,,,,25.0,0.23,mode,True
86487,9902109862,962332419,261776025,tertiary,Northwest Broken Sound Boulevard,,4,True,asphalt,False,14.290737874572892,left|left|through|right,,,,,,,"LINESTRING (588394.773 2919590.183, 588394.425 2919604.414)",0,0,0,0,0,1,0,0,0,1,0,0,3.910657,93000288.0,BROKEN SOUND BLVD,18.0,,1.0,28500.0,1340.0,,,17.0,,9.0,,,5.51114,Broken Sound,25.0,2.0,PRI,,,False,False,,,,,25.0,0.23,mode,True
207272,11466083419,11466083418,135935648,tertiary,Northwest Broken Sound Boulevard,,2,True,asphalt,False,4.318966403172058,,,,,,,,"LINESTRING (588739.263 2918814.114, 588739.034 2918818.41)",0,0,0,0,0,0,0,0,0,0,0,0,8.105262,93000288.0,BROKEN SOUND BLVD,18.0,,1.0,28500.0,1340.0,,,17.0,,9.0,,,4.600552,Broken Sound,30.0,2.0,MUN,,25.0,False,False,,,,,25.0,0.55,osm,True
207301,1492076840,5549681397,579489158,tertiary,Northwest Broken Sound Boulevard,,1,True,asphalt,False,4.97192146834158,,,,,,,,"LINESTRING (588702.18 2918976.403, 588700.323 2918980.998)",0,0,0,0,0,0,0,0,0,0,0,0,5.02604,93000288.0,BROKEN SOUND BLVD,18.0,,1.0,28500.0,1340.0,,,17.0,,9.0,,,5.565542,Broken Sound,30.0,2.0,MUN,,25.0,False,False,,,,commercial,25.0,0.65,osm,True
207302,5549681397,2673916956,579489158,tertiary,Northwest Broken Sound Boulevard,,1,True,asphalt,False,5.307663899993868,,,,,,,,"LINESTRING (588700.323 2918980.998, 588698.006 2918985.756)",0,0,0,0,0,0,0,0,0,0,0,0,5.15732,93000288.0,BROKEN SOUND BLVD,18.0,,1.0,28500.0,1340.0,,,17.0,,9.0,,,5.748323,Broken Sound,30.0,2.0,MUN,,25.0,False,False,,,,commercial,25.0,0.65,osm,True
207303,1492076842,1492076858,579489158,tertiary,Northwest Broken Sound Boulevard,,1,True,asphalt,False,32.88336749728864,,,,,,,,"LINESTRING (588694.787 2918991.061, 588672.117 2919014.819)",0,0,0,0,0,0,0,0,0,0,0,0,2.411784,93000288.0,BROKEN SOUND BLVD,18.0,,1.0,28500.0,1340.0,,,17.0,,9.0,,,3.028486,Broken Sound,30.0,2.0,MUN,,25.0,False,False,,,,commercial,25.0,0.65,osm,True
207304,1492076858,5503877727,321873813,tertiary,Northwest Broken Sound Boulevard,,2,False,asphalt,False,81.37759713048389,,,,,,,,"LINESTRING (588672.117 2919014.819, 588615.432 2919073.055)",0,0,0,0,0,0,0,0,0,0,0,0,0.27503,93000288.0,BROKEN SOUND BLVD,18.0,,1.0,28500.0,1340.0,,,17.0,,9.0,,,0.252392,Broken Sound,30.0,2.0,MUN,,25.0,False,False,,,,commercial,25.0,0.65,osm,True
207305,1492076858,5549681419,579489157,tertiary,Northwest Broken Sound Boulevard,,1,True,asphalt,False,16.069555499243815,,,,,,,,"LINESTRING (588672.117 2919014.819, 588679.269 2919000.48)",0,0,0,0,0,0,0,0,0,0,0,0,1.855763,93000288.0,BROKEN SOUND BLVD,18.0,,1.0,28500.0,1340.0,,,17.0,,9.0,,,1.705002,Broken Sound,30.0,2.0,MUN,,25.0,False,False,,,,commercial,25.0,0.65,osm,True
207307,5503877727,1492076860,321873813,tertiary,Northwest Broken Sound Boulevard,,2,False,asphalt,False,100.91019906613325,,,,,,,,"LINESTRING (588615.432 2919073.055, 588545.897 2919145.989)",0,0,0,0,0,0,0,0,0,0,0,0,0.364153,93000288.0,BROKEN SOUND BLVD,18.0,,1.0,28500.0,1340.0,,,17.0,,9.0,,,0.373966,Broken Sound,30.0,2.0,MUN,,25.0,False,False,,,,commercial,25.0,0.65,osm,True


In [10]:
fmm_columns = ['fid', 'u', 'v', 'geometry']

fmm_export = enriched_network.reset_index()[fmm_columns].copy()

fmm_export['u'] = pd.to_numeric(fmm_export['u']).astype(np.int64)
fmm_export['v'] = pd.to_numeric(fmm_export['v']).astype(np.int64)

fmm_export = fmm_export.to_crs(epsg=4326)

os.makedirs("fmm_input", exist_ok=True)
fmm_export.to_file("fmm_input/edges.shp", index=False)

enriched_network.reset_index().to_csv("palm_beach_roads.csv", index=False)


In [11]:
enriched_network.to_csv("palm_beach_roads.csv", index=False)

print("Export complete: silver_enriched_network.csv")

Export complete: silver_enriched_network.csv


In [9]:
import pandas as pd
import geopandas as gpd
import numpy as np

print("Loading enriched network...")
df = gpd.read_parquet("silver_enriched_network.parquet")
if isinstance(df.index, pd.MultiIndex) or df.index.name is not None:
    df = df.reset_index()

total = len(df)
print(f"Total segments: {total:,}\n")

tier1 = ['motorway','motorway_link','trunk','trunk_link']
tier2 = ['primary','primary_link','secondary','secondary_link']
tier3 = ['tertiary','tertiary_link','unclassified']
tier4 = ['residential','living_street']
tier5 = ['service']

# Overall coverage
print("=" * 60)
print("Overall Coverage")
print("=" * 60)

has_speed = df['target_speed'].notna()
has_owner = df['OWNER'].notna()
has_fdot = df['FDOT_ROADWAY'].notna()
has_county = df['COUNTY_NAME'].notna()
has_osm_speed = df['osm_maxspeed'].notna()
osm_lanes = pd.to_numeric(df['lanes'], errors='coerce')
has_lanes = df['FDOT_LANE_COUNT'].notna() | osm_lanes.notna()
has_aadt = df['FDOT_AADT'].notna()
has_func = df['FDOT_FUNCTIONAL_CLASS'].notna() | df['COUNTY_FUNCTIONAL_CLASS'].notna()

print(f"  Target speed:     {has_speed.sum():>8,}  ({has_speed.mean()*100:.1f}%)")
print(f"  FDOT match:       {has_fdot.sum():>8,}  ({has_fdot.mean()*100:.1f}%)")
print(f"  County match:     {has_county.sum():>8,}  ({has_county.mean()*100:.1f}%)")
print(f"  OSM maxspeed:     {has_osm_speed.sum():>8,}  ({has_osm_speed.mean()*100:.1f}%)")
print(f"  Owner tag:        {has_owner.sum():>8,}  ({has_owner.mean()*100:.1f}%)")
print(f"  Lane count:       {has_lanes.sum():>8,}  ({has_lanes.mean()*100:.1f}%)")
print(f"  AADT:             {has_aadt.sum():>8,}  ({has_aadt.mean()*100:.1f}%)")
print(f"  Functional class: {has_func.sum():>8,}  ({has_func.mean()*100:.1f}%)")

# Speed sources
print("\n" + "=" * 60)
print("Speed Source Breakdown")
print("=" * 60)

source_counts = df['speed_source'].value_counts()
for src, cnt in source_counts.items():
    pct = cnt / total * 100
    bar = "█" * int(pct / 2)
    print(f"  {src:<25} {cnt:>8,}  ({pct:>5.1f}%)  {bar}")

# Confidence distribution
print("\n" + "=" * 60)
print("Confidence Score Distribution")
print("=" * 60)

cs = df['confidence_score']
print(f"  Mean:    {cs.mean():.3f}")
print(f"  Median:  {cs.median():.3f}")
print(f"  Std:     {cs.std():.3f}")
print(f"  Min:     {cs.min():.3f}")
print(f"  Max:     {cs.max():.3f}")
print()
bins = [0, 0.2, 0.4, 0.6, 0.8, 1.01]
conf_labels = ["Very Low (0.0-0.2)", "Low (0.2-0.4)", "Medium (0.4-0.6)", "High (0.6-0.8)", "Very High (0.8-1.0)"]
for label, (lo, hi) in zip(conf_labels, zip(bins, bins[1:])):
    mask = (cs >= lo) & (cs < hi)
    cnt = mask.sum()
    pct = cnt / total * 100
    bar = "█" * int(pct / 2)
    print(f"  {label:<22} {cnt:>8,}  ({pct:>5.1f}%)  {bar}")

# Coverage by highway type
print("\n" + "=" * 60)
print("Coverage by Highway Type")
print("=" * 60)

hw_group = df.groupby('highway').agg(
    count=('target_speed', 'count'),
    speed_coverage=('target_speed', lambda x: x.notna().mean()),
    avg_confidence=('confidence_score', 'mean'),
    osm_speed_pct=('osm_maxspeed', lambda x: x.notna().mean()),
    fdot_pct=('FDOT_ROADWAY', lambda x: x.notna().mean()),
    county_pct=('COUNTY_NAME', lambda x: x.notna().mean()),
).sort_values('count', ascending=False)

print(f"\n  {'Highway':<22} {'Count':>7}  {'Speed%':>7}  {'AvgConf':>8}  {'OSM%':>6}  {'FDOT%':>6}  {'Cnty%':>6}")
print("  " + "-"*72)
for hw, row in hw_group.iterrows():
    tier_note = ""
    if hw in tier1: tier_note = " ★★"
    elif hw in tier2: tier_note = " ★"
    elif hw in tier4+tier5: tier_note = " (low expected)"
    print(f"  {hw:<22} {int(row['count']):>7,}  {row['speed_coverage']*100:>6.1f}%  {row['avg_confidence']:>8.3f}  "
          f"{row['osm_speed_pct']*100:>5.1f}%  {row['fdot_pct']*100:>5.1f}%  {row['county_pct']*100:>5.1f}%{tier_note}")

# Connectors
print("\n" + "=" * 60)
print("Connector & Link Analysis")
print("=" * 60)

connectors = df[df['is_connector'] == True].copy()
mainlines = df[df['is_connector'] == False].copy()
link_types = df[df['highway'].str.contains('_link', na=False)].copy()

print(f"\n  Connectors:   {len(connectors):>8,}  ({len(connectors)/total*100:.1f}%)")
print(f"  Mainlines:    {len(mainlines):>8,}  ({len(mainlines)/total*100:.1f}%)")
print(f"  Link types:   {len(link_types):>8,}  ({len(link_types)/total*100:.1f}%)")

print(f"\n  {'Type':<22} {'Count':>7}  {'Speed%':>7}  {'AvgConf':>8}  {'OSM%':>6}  {'FDOT%':>6}  {'Cnty%':>6}")
print("  " + "-"*72)
for label, subset in [("Connectors", connectors), ("Mainlines", mainlines)]:
    if len(subset) == 0: continue
    print(f"  {label:<22} {len(subset):>7,}  "
          f"{subset['target_speed'].notna().mean()*100:>6.1f}%  "
          f"{subset['confidence_score'].mean():>8.3f}  "
          f"{subset['osm_maxspeed'].notna().mean()*100:>5.1f}%  "
          f"{subset['FDOT_ROADWAY'].notna().mean()*100:>5.1f}%  "
          f"{subset['COUNTY_NAME'].notna().mean()*100:>5.1f}%")

print(f"\n  Link breakdown:")
print(f"  {'Highway':<22} {'Count':>7}  {'AvgConf':>8}  {'OSM%':>6}  {'FDOT%':>6}")
print("  " + "-"*52)
for hw in ['motorway_link','trunk_link','primary_link','secondary_link','tertiary_link']:
    sub = df[df['highway'] == hw]
    if len(sub) == 0: continue
    print(f"  {hw:<22} {len(sub):>7,}  {sub['confidence_score'].mean():>8.3f}  "
          f"{sub['osm_maxspeed'].notna().mean()*100:>5.1f}%  "
          f"{sub['FDOT_ROADWAY'].notna().mean()*100:>5.1f}%")

# Roundabouts
print("\n" + "=" * 60)
print("Roundabout Analysis")
print("=" * 60)

roundabouts = df[df['is_roundabout'] == True].copy()
print(f"\n  Total: {len(roundabouts):,} ({len(roundabouts)/total*100:.1f}%)")
print(f"  Speed coverage: {roundabouts['target_speed'].notna().mean()*100:.1f}%")
print(f"  Avg confidence: {roundabouts['confidence_score'].mean():.3f}")

print(f"\n  Sources:")
for src, cnt in roundabouts['speed_source'].value_counts().items():
    print(f"    {src:<25} {cnt:>6,}  ({cnt/len(roundabouts)*100:.1f}%)")

print(f"\n  Speed distribution:")
for spd, cnt in roundabouts['target_speed'].dropna().astype(int).value_counts().sort_index().items():
    print(f"    {spd:>3} mph  {cnt:>5,}")

# Traffic controls
print("\n" + "=" * 60)
print("Traffic Controls")
print("=" * 60)

print(f"  Stop at u-end:    {df['OSM_has_stop_u'].sum():>8,}")
print(f"  Stop at v-end:    {df['OSM_has_stop_v'].sum():>8,}")
print(f"  Stop (either):    {df['has_stop_at_ends'].sum():>8,}  ({df['has_stop_at_ends'].mean()*100:.1f}%)")
print(f"  Signal (either):  {df['has_signal_at_ends'].sum():>8,}  ({df['has_signal_at_ends'].mean()*100:.1f}%)")
print(f"  Give way:         {df['has_give_way_at_ends'].sum():>8,}  ({df['has_give_way_at_ends'].mean()*100:.1f}%)")
print(f"  Roundabouts:      {df['is_roundabout'].sum():>8,}  ({df['is_roundabout'].mean()*100:.1f}%)")

print(f"\n  Stop/signal rates by type (study roads):")
study_ctrl = df[df['highway'].isin(tier1+tier2+tier3)]
ctrl_by_hw = study_ctrl.groupby('highway').agg(
    count=('has_stop_at_ends','count'),
    stop_pct=('has_stop_at_ends','mean'),
    signal_pct=('has_signal_at_ends','mean'),
    give_way_pct=('has_give_way_at_ends','mean'),
).sort_values('count', ascending=False)

print(f"\n  {'Highway':<22} {'Count':>7}  {'Stop%':>7}  {'Signal%':>8}  {'GiveWay%':>9}")
print("  " + "-"*58)
for hw, row in ctrl_by_hw.iterrows():
    print(f"  {hw:<22} {int(row['count']):>7,}  {row['stop_pct']*100:>6.1f}%  "
          f"{row['signal_pct']*100:>7.1f}%  {row['give_way_pct']*100:>8.1f}%")

# Lane counts
print("\n" + "=" * 60)
print("Lane Count Analysis")
print("=" * 60)

df['resolved_lanes'] = pd.to_numeric(df['FDOT_LANE_COUNT'], errors='coerce')
osm_lanes_num = pd.to_numeric(df['lanes'], errors='coerce')
df['resolved_lanes'] = df['resolved_lanes'].fillna(osm_lanes_num)

has_lanes_r = df['resolved_lanes'].notna()
print(f"  With lane data: {has_lanes_r.sum():,} ({has_lanes_r.mean()*100:.1f}%)")
print(f"    From FDOT:    {df['FDOT_LANE_COUNT'].notna().sum():,}")
print(f"    From OSM:     {(df['FDOT_LANE_COUNT'].isna() & osm_lanes_num.notna()).sum():,}")

print(f"\n  Distribution:")
lane_counts = df['resolved_lanes'].dropna().astype(int).value_counts().sort_index()
for lanes_n, cnt in lane_counts.items():
    pct = cnt / has_lanes_r.sum() * 100
    bar = "█" * int(pct / 2)
    print(f"    {lanes_n:>2} lanes  {cnt:>8,}  ({pct:>5.1f}%)  {bar}")

print(f"\n  By highway type (study roads):")
study_lanes = df[df['highway'].isin(tier1+tier2+tier3) & df['resolved_lanes'].notna()]
lane_by_hw = study_lanes.groupby('highway')['resolved_lanes'].agg(['mean','median','count'])
for hw, row in lane_by_hw.sort_values('count', ascending=False).iterrows():
    print(f"    {hw:<22} avg={row['mean']:.1f}  median={row['median']:.0f}  n={int(row['count']):,}")

# AADT
print("\n" + "=" * 60)
print("Traffic Volume (AADT)")
print("=" * 60)

aadt = df['FDOT_AADT'].dropna()
print(f"  Coverage: {len(aadt):,} ({len(aadt)/total*100:.1f}%)")
print(f"  Mean:     {aadt.mean():>10,.0f}")
print(f"  Median:   {aadt.median():>10,.0f}")
print(f"  Max:      {aadt.max():>10,.0f}")

print(f"\n  Percentiles:")
for p in [10, 25, 50, 75, 90, 95, 99]:
    print(f"    p{p:<3}: {np.percentile(aadt, p):>10,.0f}")

print(f"\n  By highway type:")
aadt_by_hw = df[df['FDOT_AADT'].notna()].groupby('highway')['FDOT_AADT'].agg(
    ['mean','median','count']
).sort_values('median', ascending=False)
for hw, row in aadt_by_hw.iterrows():
    print(f"    {hw:<22} median={row['median']:>9,.0f}  mean={row['mean']:>9,.0f}  n={int(row['count']):,}")

print(f"\n  Top 10 highest:")
top_aadt = df[df['FDOT_AADT'].notna()].nlargest(10, 'FDOT_AADT')[['name','highway','FDOT_AADT','target_speed','OWNER']]
print(top_aadt.to_string(index=False))

# Ownership
print("\n" + "=" * 60)
print("Ownership & Confidence by Owner")
print("=" * 60)

owner_stats = df.groupby('OWNER').agg(
    count=('target_speed', 'count'),
    speed_pct=('target_speed', lambda x: x.notna().mean()),
    avg_conf=('confidence_score', 'mean'),
    high_conf_pct=('confidence_score', lambda x: (x >= 0.6).mean()),
    fdot_pct=('FDOT_ROADWAY', lambda x: x.notna().mean()),
    osm_speed_pct=('osm_maxspeed', lambda x: x.notna().mean()),
).sort_values('count', ascending=False)

print(f"\n  {'Owner':<10} {'Count':>7}  {'Speed%':>7}  {'AvgConf':>8}  {'High%':>7}  {'FDOT%':>6}  {'OSM%':>6}")
print("  " + "-"*62)
for owner, row in owner_stats.iterrows():
    print(f"  {str(owner):<10} {int(row['count']):>7,}  {row['speed_pct']*100:>6.1f}%  "
          f"{row['avg_conf']:>8.3f}  {row['high_conf_pct']*100:>6.1f}%  "
          f"{row['fdot_pct']*100:>5.1f}%  {row['osm_speed_pct']*100:>5.1f}%")

# Speed distribution
print("\n" + "=" * 60)
print("Speed Limit Distribution")
print("=" * 60)

speed_counts = df['target_speed'].dropna().astype(int).value_counts().sort_index()
for spd, cnt in speed_counts.items():
    pct = cnt / has_speed.sum() * 100
    bar = "█" * int(pct / 1.5)
    print(f"  {spd:>3} mph   {cnt:>8,}  ({pct:>5.1f}%)  {bar}")

suspicious = df[(df['target_speed'] < 10) | (df['target_speed'] > 80)]
if len(suspicious) > 0:
    print(f"\n  Suspicious (<10 or >80 mph): {len(suspicious):,}")
    print(suspicious[['name','highway','target_speed','speed_source']].head(10).to_string(index=False))

# Multi-source agreement
print("\n" + "=" * 60)
print("Multi-Source Agreement")
print("=" * 60)

osm_fdot = (
    df['osm_maxspeed'].notna() & df['FDOT_SPEED'].notna() &
    (np.abs(df['osm_maxspeed'] - df['FDOT_SPEED']) <= 5)
)
osm_county = (
    df['osm_maxspeed'].notna() & df['COUNTY_SPEED'].notna() &
    (df['COUNTY_SPEED'] != 35) &
    (np.abs(df['osm_maxspeed'] - df['COUNTY_SPEED']) <= 5)
)
fdot_county = (
    df['FDOT_SPEED'].notna() & df['COUNTY_SPEED'].notna() &
    (df['COUNTY_SPEED'] != 35) &
    (np.abs(df['FDOT_SPEED'] - df['COUNTY_SPEED']) <= 5)
)
triple = osm_fdot & osm_county & fdot_county

print(f"  OSM ↔ FDOT:    {osm_fdot.sum():>7,}  ({osm_fdot.mean()*100:.1f}%)")
print(f"  OSM ↔ County:  {osm_county.sum():>7,}  ({osm_county.mean()*100:.1f}%)")
print(f"  FDOT ↔ County: {fdot_county.sum():>7,}  ({fdot_county.mean()*100:.1f}%)")
print(f"  All three:     {triple.sum():>7,}  ({triple.mean()*100:.1f}%)")

critical = df[df['highway'].isin(tier1 + tier2)].copy()
critical_disagree = critical[
    critical['osm_maxspeed'].notna() & critical['FDOT_SPEED'].notna() &
    (np.abs(critical['osm_maxspeed'] - critical['FDOT_SPEED']) > 5)
]
print(f"\n  Critical roads where OSM != FDOT: {len(critical_disagree):>6,}")
if len(critical_disagree) > 0:
    print(critical_disagree[['name','highway','osm_maxspeed','FDOT_SPEED','COUNTY_SPEED']].head(8).to_string(index=False))

# Landuse
print("\n" + "=" * 60)
print("Landuse Context")
print("=" * 60)

lu_stats = df.groupby('landuse').agg(
    count=('target_speed', 'count'),
    avg_conf=('confidence_score', 'mean'),
    speed_pct=('target_speed', lambda x: x.notna().mean()),
    avg_speed=('target_speed', 'mean'),
).sort_values('count', ascending=False)

print(f"\n  {'Landuse':<18} {'Count':>7}  {'Speed%':>7}  {'AvgConf':>8}  {'AvgSpd':>8}")
print("  " + "-"*54)
for lu, row in lu_stats.iterrows():
    print(f"  {str(lu):<18} {int(row['count']):>7,}  {row['speed_pct']*100:>6.1f}%  "
          f"{row['avg_conf']:>8.3f}  {row['avg_speed']:>7.1f}")
print(f"\n  No landuse: {df['landuse'].isna().sum():,} ({df['landuse'].isna().mean()*100:.1f}%)")

# Road complexity
print("\n" + "=" * 60)
print("Road Complexity (Median & Access Class)")
print("=" * 60)

if 'FDOT_MEDIAN_TYPE' in df.columns:
    med_counts = df['FDOT_MEDIAN_TYPE'].value_counts().head(12)
    print(f"\n  Median type:")
    for mt, cnt in med_counts.items():
        print(f"    {str(mt):<40} {cnt:>8,}  ({cnt/total*100:.1f}%)")
    print(f"    No data: {df['FDOT_MEDIAN_TYPE'].isna().sum():,}")

if 'FDOT_ACCESS_CLASS' in df.columns:
    acc_counts = df['FDOT_ACCESS_CLASS'].value_counts().head(10)
    print(f"\n  Access class:")
    for ac, cnt in acc_counts.items():
        print(f"    {str(ac):<40} {cnt:>8,}  ({cnt/total*100:.1f}%)")
    print(f"    No data: {df['FDOT_ACCESS_CLASS'].isna().sum():,}")

# Segment length
print("\n" + "=" * 60)
print("Segment Length")
print("=" * 60)

if 'length' in df.columns:
    df['length_m'] = pd.to_numeric(df['length'], errors='coerce')
else:
    df['length_m'] = df.geometry.length

total_km = df['length_m'].sum() / 1000
print(f"  Total network: {total_km:,.1f} km ({total_km*0.621371:,.1f} mi)")
print(f"  Mean segment:  {df['length_m'].mean():.1f} m")
print(f"  Median:        {df['length_m'].median():.1f} m")

print(f"\n  By highway type:")
len_by_hw = df.groupby('highway')['length_m'].agg(['sum','mean','count']).sort_values('sum', ascending=False)
for hw, row in len_by_hw.iterrows():
    note = " ★★" if hw in tier1 else " ★" if hw in tier2 else ""
    print(f"    {hw:<22} total={row['sum']/1000:>8,.1f}km  avg={row['mean']:>7.1f}m  n={int(row['count']):>7,}{note}")

short = df[df['length_m'] < 10]
print(f"\n  Very short (<10m): {len(short):,} ({len(short)/total*100:.1f}%) — OSM fragmentation")
if len(short) > 0:
    print(short.groupby('highway').size().sort_values(ascending=False).head(8).to_string())

# Missing speed
print("\n" + "=" * 60)
print("Missing Speed Investigation")
print("=" * 60)

none_segs = df[df['speed_source'] == 'none']
missing_speed = df[df['target_speed'].isna()]

print(f"  speed_source='none': {len(none_segs):,}")
print(f"  target_speed is NaN: {len(missing_speed):,}")

if len(none_segs) > 0:
    print(f"\n  By highway type:")
    print(none_segs['highway'].value_counts().to_string())
    print(f"\n  Sample:")
    print(none_segs[['name','highway','osm_maxspeed','FDOT_SPEED','COUNTY_SPEED','OWNER']].head(15).to_string(index=False))

# Study network scorecard
print("\n" + "=" * 60)
print("Study Network (excl residential/service)")
print("=" * 60)

study = df[~df['highway'].isin(['residential','service','living_street'])].copy()
total_study = len(study)
cs_s = study['confidence_score']

print(f"\n  Segments: {total_study:,} ({total_study/total*100:.1f}%)")
print(f"  Confidence — Mean: {cs_s.mean():.3f}  Median: {cs_s.median():.3f}  Std: {cs_s.std():.3f}")

print(f"\n  {'Tier':<22} {'Segs':>7}  {'Speed%':>7}  {'AvgConf':>8}  {'High>=0.6':>10}  {'FuncClass%':>11}")
print("  " + "-"*74)

tier_map = [
    ("Motorway", ['motorway']),
    ("Motorway links", ['motorway_link']),
    ("Trunk", ['trunk']),
    ("Trunk links", ['trunk_link']),
    ("Primary", ['primary']),
    ("Primary links", ['primary_link']),
    ("Secondary", ['secondary']),
    ("Secondary links", ['secondary_link']),
    ("Tertiary", ['tertiary','tertiary_link']),
    ("Unclassified", ['unclassified']),
]

all_study_types = tier1+tier2+tier3

for label, types in tier_map:
    subset = study[study['highway'].isin(types)]
    if len(subset) == 0: continue
    spd_pct = subset['target_speed'].notna().mean() * 100
    avg_c = subset['confidence_score'].mean()
    high_pct = (subset['confidence_score'] >= 0.6).mean() * 100
    fc_pct = (subset['speed_source'] == 'functional_class').mean() * 100
    print(f"  {label:<22} {len(subset):>7,}  {spd_pct:>6.1f}%  {avg_c:>8.3f}  {high_pct:>9.1f}%  {fc_pct:>10.1f}%")

print("  " + "-"*74)
sub_total = study[study['highway'].isin(all_study_types)]
spd_pct = sub_total['target_speed'].notna().mean() * 100
avg_c = sub_total['confidence_score'].mean()
high_pct = (sub_total['confidence_score'] >= 0.6).mean() * 100
fc_pct = (sub_total['speed_source'] == 'functional_class').mean() * 100
print(f"  {'TOTAL':<22} {len(sub_total):>7,}  {spd_pct:>6.1f}%  {avg_c:>8.3f}  {high_pct:>9.1f}%  {fc_pct:>10.1f}%")

print("\nDone")

Loading enriched network...
Total segments: 947,921

Overall Coverage
  Target speed:      947,530  (100.0%)
  FDOT match:         90,985  (9.6%)
  County match:      294,441  (31.1%)
  OSM maxspeed:       79,337  (8.4%)
  Owner tag:         294,437  (31.1%)
  Lane count:        131,646  (13.9%)
  AADT:               78,660  (8.3%)
  Functional class:   96,847  (10.2%)

Speed Source Breakdown
  functional_class           617,033  ( 65.1%)  ████████████████████████████████
  county_primary             120,947  ( 12.8%)  ██████
  mode                       112,627  ( 11.9%)  █████
  osm                         69,101  (  7.3%)  ███
  county_suspect              12,603  (  1.3%)  
  graph_lateral                4,630  (  0.5%)  
  fdot_fallback                4,427  (  0.5%)  
  graph_acceleration           2,716  (  0.3%)  
  graph_deceleration           2,454  (  0.3%)  
  none                           391  (  0.0%)  
  graph_exit_only                275  (  0.0%)  
  fdot_primary     