# S06 — Graph Smoothing & (Optional) GNN

Сглаживаем риск по дорожной сети (**лаплассиан**) и опционально обучаем **GraphSAGE**
для распространения на мало‑покрытые рёбра.

In [None]:
%run ./S00_setup.ipynb

In [None]:
# 1) Загрузка: edge features + EB/ML
edge_df = pd.read_parquet(EDGE_FEATURES_PARQUET) if EDGE_FEATURES_PARQUET.exists() else None
events_edge = pd.read_parquet(EVENTS_EDGE_PARQUET) if EVENTS_EDGE_PARQUET.exists() else None
ml_edge = pd.read_parquet(ML_EDGE_PRED_PARQUET) if ML_EDGE_PRED_PARQUET.exists() else None
print([e.shape if e is not None else None for e in [edge_df, events_edge, ml_edge]])
if edge_df is None or events_edge is None:
    raise SystemExit("Нужны EDGE_FEATURES и EVENTS_EDGE. Запустите S03 и S04.")

In [None]:
# 2) Базовый риск из правил: r = sum w_i * (k_i / n_obs)
w = CONFIG["SRI_WEIGHTS"]
ev = events_edge.copy()
ev["base_risk"] = 0.0
for r in REASONS:
    if r in ev.columns:
        ev["base_risk"] += w.get(r,1.0) * (ev[r] / ev["n_obs"].replace(0,np.nan))
base = edge_df.drop(columns=["n_obs"], errors="ignore").merge(ev[["u","v","key","base_risk","n_obs"]], on=["u","v","key"], how="left")
if ml_edge is not None:
    base = base.merge(ml_edge, on=["u","v","key"], how="left")
    base["base_risk"] = 0.7*base["base_risk"].fillna(0) + 0.3*base["ml_prob"].fillna(0)

base["base_risk"] = base["base_risk"].fillna(0)

In [None]:
# 3) Графовое сглаживание (без scipy — итеративная аппроксимация)
if ox is not None and GRAPH_PATH.exists():
    G = ox.load_graphml(GRAPH_PATH)
    # Построим индекс ребер -> id
    edge_index = {}
    idx = 0
    neighbors = {}
    for u,v,k,data in G.edges(keys=True, data=True):
        edge_index[(u,v,k)] = idx
        neighbors[idx] = set()
        idx += 1
    # Соседство: рёбра, смежные через общую вершину
    for u in G.nodes():
        out_edges = list(G.edges(u, keys=True))
        indices = [edge_index[e] for e in out_edges if e in edge_index]
        for i in indices:
            neighbors[i].update(indices)
            if i in neighbors[i]:
                neighbors[i].remove(i)
    # Вектор base_risk в порядке edge_index
    base = base.copy()
    base["eid"] = [edge_index.get((row["u"],row["v"],row["key"]), -1) for _,row in base.iterrows()]
    base = base[base["eid"]>=0]
    v = np.zeros(len(neighbors), dtype=float)
    for _,row in base.iterrows():
        v[int(row["eid"])] = float(row["base_risk"])
    # Сглаживание
    v_sm = iterative_neighbor_smoothing(v, neighbors, lam=CONFIG["GRAPH_LAMBDA"], n_iter=12)
    base["risk_smoothed"] = [v_sm[int(e)] for e in base["eid"]]
else:
    print("Нет графа OSM — используем несглаженный риск.")
    base["risk_smoothed"] = base["base_risk"]

In [None]:
# 4) Итоговый SRI 0–100 (сигмоида + калибровка квантилями)
x = base["risk_smoothed"].fillna(0).values
x = (x - np.nanmedian(x)) / (np.nanstd(x) + 1e-6)
sri = 100.0 / (1.0 + np.exp(-x))
base["SRI"] = sri

# Экспорт
base_out = base[["u","v","key","SRI","risk_smoothed","base_risk","n_obs"]].copy()
base_out.to_parquet(SRI_EDGE_PARQUET, index=False)
print("Saved:", SRI_EDGE_PARQUET)