# 7f – Threshold / K‑nearest rule

Assign each spoke to its closest hub **only if the hub lies within `THRESH_KM`** (default 40 km); otherwise mark as ‘unassigned’ to flag coverage gaps.

In [None]:
import math

import pandas as pd, math
from pathlib import Path

DATA_DIR = Path('.')
ACUTE_CSV = DATA_DIR / 'NHS_SW_Acute_Hospitals_enriched.csv'
CDC_CSV   = DATA_DIR / 'NHS_SW_Community_Diagnostic_Centres_enriched.csv'
CH_CSV    = DATA_DIR / 'NHS_SW_Community_Hospitals_enriched.csv'
R_EARTH = 6371

def haversine(lat1, lon1, lat2, lon2):
    φ1, λ1, φ2, λ2 = map(math.radians, (lat1, lon1, lat2, lon2))
    dφ, dλ = φ2 - φ1, λ2 - λ1
    a = math.sin(dφ/2)**2 + math.cos(φ1)*math.cos(φ2)*math.sin(dλ/2)**2
    return 2 * R_EARTH * math.atan2(math.sqrt(a), math.sqrt(1-a))

THRESH_KM = 40

acute = pd.read_csv(ACUTE_CSV)
spokes = pd.concat([pd.read_csv(CDC_CSV), pd.read_csv(CH_CSV)], ignore_index=True)

def label_within_threshold(row):
    dists = acute.apply(lambda hub: haversine(row.latitude, row.longitude,
                                              hub.latitude, hub.longitude), axis=1)
    min_dist = dists.min()
    if min_dist <= THRESH_KM:
        return acute.loc[dists.idxmin(), 'Name']
    return None

spokes['nearest_acute'] = spokes.apply(label_within_threshold, axis=1)
spokes['dist_km'] = spokes.apply(
    lambda r: math.inf if r.nearest_acute is None else
    haversine(r.latitude, r.longitude,
              acute.loc[acute.Name == r.nearest_acute, 'latitude'].values[0],
              acute.loc[acute.Name == r.nearest_acute, 'longitude'].values[0]),
    axis=1)

spokes.head()