In [1]:
import osmnx as ox, pandas as pd, geopandas as gpd, networkx as nx, numpy as np
from scipy.spatial import cKDTree
from tqdm import tqdm

In [33]:
# Load Mecklenburg boundary
meck_bo = gpd.read_file("../../../Data/Original_dataset/Archive/mecklenburgcounty_boundary/MecklenburgCounty_Boundary.shp").to_crs(epsg=4326)
ABT = gpd.read_file("../../../Data/Final_dataset/ABT/ABT.gpkg", layer="subdivisions").to_crs(epsg=4326)
transit_stations = gpd.read_file("../../../Data/Original_dataset/transit_stations_4326.shp").to_crs(epsg=4326)
groceries_dataset = gpd.read_file("../../../Data/Original_dataset/Archive/Groceries/Grocery_Stores/Grocery_Stores_(points).shp").to_crs(epsg=4326)
groceries_dataset = groceries_dataset[groceries_dataset.within(meck_bo.geometry.iloc[0])]

In [3]:
G_walk = ox.graph_from_polygon(meck_bo.geometry.iloc[0], network_type='walk') # Get the pedestrian network within the boundary

# Optional: simplify and project
G_walk = ox.project_graph(G_walk, to_crs='EPSG:2264')

In [35]:
# Add walking time (minutes) to each edge
WALK_SPEED = 1.4  # m/s (~5 km/h)
for u, v, k, data in G_walk.edges(keys=True, data=True):
    length_m = data.get("length", 0)
    data["time_min"] = length_m / (WALK_SPEED * 60)

In [36]:
# Compute centroids as trip origins
ABT["centroid"] = ABT.geometry.centroid
ABT["lon"] = ABT["centroid"].x;ABT["lat"] = ABT["centroid"].y


  ABT["centroid"] = ABT.geometry.centroid


In [37]:
Q1 = ABT["APT"].quantile(0.25)
Q3 = ABT["APT"].quantile(0.75)
IQR = Q3 - Q1
outliers = ABT[
    (ABT["APT"] < Q1 - 1.5 * IQR) | 
    (ABT["APT"] > Q3 + 1.5 * IQR)
]

In [38]:
# outliers = outliers.nlargest(5, "APT")

In [42]:
# Precompute graph nodes for origins/destinations
sub_pts = outliers.set_geometry("centroid").to_crs(G_walk.graph["crs"])
sta_pts = transit_stations.to_crs(G_walk.graph["crs"])
# groc_pts = groceries_dataset.to_crs(G_walk.graph["crs"])

#Build KDTree (for initial candidate filtering)
station_coords = np.array(list(zip(sta_pts.geometry.x, sta_pts.geometry.y)))
tree_stations = cKDTree(station_coords)
# groceries_coords = np.array(list(zip(groc_pts.geometry.x, groc_pts.geometry.y)))
# tree_groceries = cKDTree(groceries_coords)

# Map each feature to its nearest graph node
sub_pts["origin_node"] = ox.distance.nearest_nodes(
    G_walk, sub_pts.geometry.x, sub_pts.geometry.y
) #snapping each subs centroid to its nearest walkable network node.
# station_nodes = [ox.distance.nearest_nodes(G_walk, x, y) for x, y in zip(sta_pts.geometry.x, sta_pts.geometry.y)] #snapping each station to its nearest walkable network node.#Compute walking time to network-true nearest station

sub_pts["nearest_station_idx"] = np.nan
sub_pts["nearest_station_node"] = np.nan
sub_pts["walk_time_min_osm"] = np.nan

In [43]:
#Find true nearest station by network
def get_network_nearest_station(G, origin_node, station_coords, tree_stations, origin_xy, k=3):
    """
    Returns index, node, and walking time (min) of the network-based nearest station
    among the k closest by Euclidean distance.
    """
    # Step 1: top-k nearest by Euclidean
    dist, nearest_idxs = tree_stations.query(origin_xy, k=k)
    if np.isscalar(nearest_idxs):  # handle k=1
        nearest_idxs = [nearest_idxs]

    best_time = np.inf
    best_station_idx = None
    best_station_node = None

    # Step 2: check walking time for each of these candidates
    for idx in nearest_idxs:
        sx, sy = station_coords[idx]
        try:
            s_node = ox.distance.nearest_nodes(G, sx, sy)
            t_min = nx.shortest_path_length(G, origin_node, s_node, weight="time_min")
            if t_min < best_time:
                best_time = t_min
                best_station_idx = idx
                best_station_node = s_node
        except Exception as e:
            # Route not found or disconnected
            continue

    if best_time == np.inf:
        best_time = np.nan
    return best_station_idx, best_station_node, best_time

In [None]:
sub_pts = sub_pts.set_geometry("centroid")  # ensure geometry is centroid (Points)

print("Computing walking times to network-based nearest transit station...")

for idx, row in tqdm(sub_pts.iterrows(), total=len(sub_pts)):
    try:
        origin_node = row["origin_node"]
        origin_xy = (row.centroid.x, row.centroid.y)
        s_idx, s_node, t_min = get_network_nearest_station(
            G_walk, origin_node, station_coords, tree_stations, origin_xy, k=3
        )
        sub_pts.loc[idx, "nearest_station_idx"] = s_idx
        sub_pts.loc[idx, "nearest_station_node"] = s_node
        sub_pts.loc[idx, "walk_time_min_osm"] = t_min
    except Exception as e:
        print(f"[{idx}] Error: {e}")

Computing walking times to network-based nearest transit station...


  3%|â–Ž         | 17/636 [01:45<1:02:40,  6.07s/it]

In [30]:
sub_pts["StopID"] = sub_pts["nearest_station_idx"].apply(
    lambda i: transit_stations.iloc[int(i)]["StopID"] if pd.notna(i) else None
)

In [32]:
sub_pts[["subd_id", "StopID","walk_time_min_osm"]].to_csv("../../../Data/Final_dataset/ABT/accessibility_osm.csv", index=False)