In [3]:
# -*- coding: utf-8 -*-
from pathlib import Path
import pandas as pd
import geopandas as gpd
from libpysal.weights import Queen

# ---------- 路径 ----------
GEO_PATH_IN   = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\cleaned geo data\NUTS2_2021.gpkg"
PANEL_PATH_IN = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\panel_long_merged.csv"

OUT_DIR        = Path(r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\filtered_by_islands")
OUT_DIR.mkdir(parents=True, exist_ok=True)
PANEL_PATH_OUT = OUT_DIR / "panel_long_no_islands.csv"
GEO_PATH_OUT   = OUT_DIR / "NUTS2_2021_no_islands.gpkg"
DROPPED_IDS_TXT= OUT_DIR / "islands_dropped.txt"

def detect_geo_id(gdf: gpd.GeoDataFrame) -> str:
    for c in ["NUTS_ID","nuts_id","NUTS_ID_2021","region","geo","code","id"]:
        if c in gdf.columns: return c
    return [c for c in gdf.columns if c != gdf.geometry.name][0]

# 读数据
gdf = gpd.read_file(GEO_PATH_IN)
gid = detect_geo_id(gdf)

# 统一 ID（注意 .str.upper()）
gdf["__id__"] = (
    gdf[gid]
    .astype("string")
    .str.strip()
    .str.upper()
)

pdf = pd.read_csv(PANEL_PATH_IN, encoding="utf-8-sig")
if "region" not in pdf.columns:
    raise ValueError("面板缺少 'region' 列")

pdf["region"] = (
    pdf["region"]
    .astype("string")
    .str.strip()
    .str.upper()
)

# Step 1: geo 先按 csv 过滤
keep_regions = set(pdf["region"].dropna().unique())
gdf_sub = gdf[gdf["__id__"].isin(keep_regions)].copy()
print(f"geo过滤后剩余区域数: {len(gdf_sub)}")

# Step 2: Queen 找孤岛
from libpysal.weights import Queen
import numpy as np

# 方式一：保留 ids
wq = Queen.from_dataframe(gdf_sub, ids=gdf_sub["__id__"].tolist())

first = next(iter(wq.islands), None)
if isinstance(first, (int, np.integer)):
    island_ids = gdf_sub["__id__"].iloc[list(wq.islands)].tolist()
else:
    island_ids = list(wq.islands)

print(f"识别出孤岛数量: {len(island_ids)}")
print("孤岛示例:", sorted(island_ids)[:12], "..." if len(island_ids)>12 else "")


with open(DROPPED_IDS_TXT, "w", encoding="utf-8") as f:
    f.write("Queen islands (degree=0) dropped:\n")
    for rid in sorted(island_ids):
        f.write(rid + "\n")

# Step 3: 从 CSV 与 GEO 都删除孤岛并另存
isles = set(island_ids)
pdf_no_islands = pdf[~pdf["region"].isin(isles)].copy()
pdf_no_islands.to_csv(PANEL_PATH_OUT, index=False, encoding="utf-8-sig")
print("✅ 已保存去孤岛后的面板:", PANEL_PATH_OUT, "| 行数:", len(pdf_no_islands))

gdf_no_islands = gdf_sub[~gdf_sub["__id__"].isin(isles)].copy()
cols = [c for c in gdf_no_islands.columns if c != "__id__"]
gdf_no_islands = gdf_no_islands[cols]

try:
    if gdf_no_islands.crs and gdf_no_islands.crs.to_epsg() == 4326:
        gdf_no_islands = gdf_no_islands.to_crs(3035)
except Exception:
    pass

gdf_no_islands.to_file(GEO_PATH_OUT, driver="GPKG")
print("✅ 已保存去孤岛后的 geo:", GEO_PATH_OUT, "| 区域数:", len(gdf_no_islands))


geo过滤后剩余区域数: 204
识别出孤岛数量: 9
孤岛示例: ['EL41', 'EL42', 'EL43', 'EL62', 'ES53', 'FI20', 'FRM0', 'ITG1', 'ITG2'] 
✅ 已保存去孤岛后的面板: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\filtered_by_islands\panel_long_no_islands.csv | 行数: 2145
✅ 已保存去孤岛后的 geo: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\filtered_by_islands\NUTS2_2021_no_islands.gpkg | 区域数: 195


 There are 14 disconnected components.
 There are 9 islands with ids: ES53, EL62, EL42, EL41, EL43, FI20, ITG2, FRM0, ITG1.
  W.__init__(self, neighbors, ids=ids, **kw)


In [2]:
# --- KNN 权重网络图 ---
from pathlib import Path
import numpy as np, pandas as pd, geopandas as gpd, matplotlib.pyplot as plt
from shapely.geometry import LineString
from libpysal.weights import KNN
import networkx as nx

PANEL = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\panel_long_no_islands.csv"
GEO   = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\NUTS2_2021_no_islands.gpkg"
OUT   = Path(r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\weights_knn")
OUT.mkdir(parents=True, exist_ok=True)

K = 6  # ← 想更密就用 10；更稀就 6

def gid(gdf):
    for c in ["NUTS_ID","nuts_id","NUTS_ID_2021","region","code","id"]:
        if c in gdf.columns: return c
    return [c for c in gdf.columns if c != gdf.geometry.name][0]

gdf = gpd.read_file(GEO)
key = gid(gdf)
gdf["region"] = gdf[key].astype("string").str.strip().str.upper()
pdf = pd.read_csv(PANEL, encoding="utf-8-sig")
pdf["region"] = pdf["region"].astype("string").str.strip().str.upper()

g = gdf.merge(pdf[["region"]].drop_duplicates(), on="region", how="inner").drop_duplicates("region")
if (not g.crs) or g.crs.to_epsg()==4326:
    g = g.to_crs(3035)

W = KNN.from_dataframe(g, k=K, use_index=True); W.transform = "R"

# 连接性诊断
G = nx.Graph({i:{j:1 for j in W.neighbors[i]} for i in range(len(g))})
comps = list(nx.connected_components(G))
print(f"KNN(k={K}) regions={len(g)}, components={len(comps)}, "
      f"min_deg={min(len(W.neighbors[i]) for i in range(len(g)))}, "
      f"avg_deg={np.mean([len(W.neighbors[i]) for i in range(len(g))]):.2f}")

# 画边
cent = g.geometry.centroid
xy = np.column_stack([cent.x.to_numpy("float64"), cent.y.to_numpy("float64")])
lines=[]
for i, nbrs in W.neighbors.items():
    for j in nbrs:
        if j<i: continue
        lines.append(LineString([xy[i], xy[j]]))
edges = gpd.GeoDataFrame(geometry=lines, crs=g.crs)

# --- Full (底图+边+点)，无任何文字 ---
fig, ax = plt.subplots(figsize=(9.5, 8))
g.plot(ax=ax, color="#d9d9d9", edgecolor="white", linewidth=0.3)
edges.plot(ax=ax, color="#7f9cf5", linewidth=0.7, alpha=0.9)
ax.scatter(cent.x, cent.y, s=7, zorder=3, color="crimson")
ax.set_axis_off()  # 不要坐标轴与刻度
fig.savefig(OUT / "knn_full_no_text.png", dpi=220,
            bbox_inches="tight", pad_inches=0)
plt.close(fig)

# --- Edges only（仅画连线），无任何文字 ---
fig, ax = plt.subplots(figsize=(9.5, 8))
edges.plot(ax=ax, color="#7f9cf5", linewidth=0.8, alpha=0.95)
ax.set_axis_off()
fig.savefig(OUT / "knn_edges_only_no_text.png", dpi=220,
            bbox_inches="tight", pad_inches=0)
plt.close(fig)

print("✅ saved:", OUT/"knn_full.png")
print("✅ saved:", OUT/"knn_edges_only.png")


KNN(k=6) regions=195, components=1, min_deg=6, avg_deg=6.00
✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\weights_knn\knn_full.png
✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\weights_knn\knn_edges_only.png


In [9]:
# -*- coding: utf-8 -*-
"""
Global Moran's I with KNN (k=6), years 2014–2023, NUTS-2
- 输入：panel_long_no_islands.csv + NUTS2_2021_no_islands.gpkg
- 权重：KNN k=6（row-standardized）
- 输出：汇总CSV + Moran散点图(每年) + 时间序列图(每变量)
"""

from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from libpysal.weights import KNN, lag_spatial
from esda.moran import Moran

# ======= 路径 =======
PANEL_PATH = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\filtered_by_islands\panel_long_no_islands.csv"
GEO_PATH   = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\filtered_by_islands\NUTS2_2021_no_islands.gpkg"
OUT_DIR    = Path(r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\global_moran_knn6")
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ======= 设置 =======
YEARS        = list(range(2014, 2024))
VARS         = ["employment_rate", "unemployment_rate", "log_gdp_pc", "vet_per_million"]
PERMUTATIONS = 999
KNN_K        = 6
MAKE_PLOTS   = True

# ======= 小工具 =======
def detect_geo_id(gdf: gpd.GeoDataFrame) -> str:
    for c in ["NUTS_ID","nuts_id","NUTS_ID_2021","region","code","id"]:
        if c in gdf.columns: return c
    return [c for c in gdf.columns if c != gdf.geometry.name][0]

def zscore(x: np.ndarray) -> np.ndarray:
    x = x.astype(float)
    return (x - x.mean()) / x.std(ddof=0)

# ======= 读数据 =======
gdf = gpd.read_file(GEO_PATH)
gid = detect_geo_id(gdf)
gdf["region"] = gdf[gid].astype("string").str.strip().str.upper()
if (not gdf.crs) or gdf.crs.to_epsg() == 4326:
    gdf = gdf.to_crs(3035)

pdf = pd.read_csv(PANEL_PATH, encoding="utf-8-sig")
pdf["region"] = pdf["region"].astype("string").str.strip().str.upper()
pdf["year"]   = pd.to_numeric(pdf["year"], errors="coerce")
for v in VARS:
    if v in pdf.columns:
        pdf[v] = pd.to_numeric(pdf[v], errors="coerce")

# ======= 主循环：Moran's I =======
rows = []
for var in VARS:
    (OUT_DIR / var).mkdir(parents=True, exist_ok=True)
    for yr in YEARS:
        d = pdf.loc[pdf["year"] == yr, ["region", var]].dropna()
        if d.empty: 
            continue
        gg = gdf[["region", "geometry"]].merge(d, on="region", how="inner").dropna(subset=[var])
        if len(gg) < 5:
            continue

        # 权重：KNN k=6（行标准化）
        W = KNN.from_dataframe(gg, k=KNN_K, use_index=True)
        W.transform = "R"

        z = zscore(gg[var].to_numpy())
        mi = Moran(z, W, permutations=PERMUTATIONS)

        rows.append({
            "variable": var, "year": yr, "N": int(len(gg)),
            "I": float(mi.I), "z_norm": float(mi.z_norm), "p_sim": float(mi.p_sim),
            "weight": f"KNN(k={KNN_K})", "permutations": PERMUTATIONS
        })

        # Moran 散点图
        if MAKE_PLOTS:
            wy = lag_spatial(W, z)
            fig, ax = plt.subplots(figsize=(5.2, 4.2))
            ax.scatter(z, wy, s=16, alpha=0.7)
            ax.axhline(0, color="k", lw=0.8, alpha=0.6)
            ax.axvline(0, color="k", lw=0.8, alpha=0.6)
            xs = np.linspace(z.min(), z.max(), 100)
            slope = np.cov(z, wy)[0,1] / np.var(z, ddof=0)  # 拟合斜率≈I（标准化下）
            ax.plot(xs, slope*xs, lw=1.2)
            ax.set_xlabel(f"Standardized {var}")
            ax.set_ylabel(f"Spatial lag of {var}")
            ax.set_title(f"{var} | {yr} | I={mi.I:.3f}, p={mi.p_sim:.3f}  (KNN k={KNN_K})")
            fig.tight_layout()
            fig.savefig(OUT_DIR / var / f"moran_scatter_{var}_{yr}.png", dpi=180)
            plt.close(fig)

# ======= 保存汇总 & 画时间序列 =======
res = pd.DataFrame(rows).sort_values(["variable","year"])
summary_csv = OUT_DIR / "global_moran_summary_knn6_2014_2023.csv"
res.to_csv(summary_csv, index=False, encoding="utf-8-sig")
print("✅ Moran's I summary saved:", summary_csv)

# 时间序列（每变量一张）
def star(p):
    return "***" if p < 0.001 else ("**" if p < 0.01 else ("*" if p < 0.05 else ""))

for var in VARS:
    sub = res[res["variable"] == var].sort_values("year")
    if sub.empty: 
        continue
    fig, ax = plt.subplots(figsize=(7.5, 3.8))
    ax.plot(sub["year"], sub["I"], marker="o", lw=1.6)
    for y, I, p in zip(sub["year"], sub["I"], sub["p_sim"]):
        s = star(p)
        if s:
            ax.text(y, I, s, ha="center", va="bottom", fontsize=10)
    ax.axhline(0, color="k", lw=0.8, alpha=0.6)
    ax.set_title(f"Global Moran's I over time (KNN k={KNN_K}): {var}")
    ax.set_xlabel("Year"); ax.set_ylabel("Moran's I")
    ax.set_xticks(sorted(sub["year"].unique()))
    ax.grid(alpha=0.15)
    fig.tight_layout()
    out_png = OUT_DIR / f"{var}_moranI_timeseries_knn{KNN_K}_2014_2023.png"
    fig.savefig(out_png, dpi=180)
    plt.close(fig)
    print("✅ Timeseries saved:", out_png)


✅ Moran's I summary saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\global_moran_knn6\global_moran_summary_knn6_2014_2023.csv
✅ Timeseries saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\global_moran_knn6\employment_rate_moranI_timeseries_knn6_2014_2023.png
✅ Timeseries saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\global_moran_knn6\unemployment_rate_moranI_timeseries_knn6_2014_2023.png
✅ Timeseries saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\global_moran_knn6\log_gdp_pc_moranI_timeseries_knn6_2014_2023.png
✅ Timeseries saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\global_moran_knn6\vet_per_million_moranI_timeseries_knn6_2014_2023.png


In [8]:
# -*- coding: utf-8 -*-
# Ten-year Moran scatterplot grid (2x5) with KNN(k=6), NUTS-2

from pathlib import Path
import math
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from libpysal.weights import KNN, lag_spatial

# ========= 路径 =========
PANEL_PATH = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\panel_long_no_islands.csv"
GEO_PATH   = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\NUTS2_2021_no_islands.gpkg"
OUT_DIR    = Path(r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\global_moran_knn6\ten_years")
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ========= 设置 =========
YEARS  = list(range(2014, 2024))
VARS   = ["employment_rate", "unemployment_rate", "log_gdp_pc", "vet_per_million"]
KNN_K  = 6  # KNN 邻居数

# ========= 小工具 =========
def detect_geo_id(gdf: gpd.GeoDataFrame) -> str:
    for c in ["NUTS_ID","nuts_id","NUTS_ID_2021","region","code","id"]:
        if c in gdf.columns:
            return c
    # 兜底：取非 geometry 的第一列
    return [c for c in gdf.columns if c != gdf.geometry.name][0]

def moran_grid_10y(panel_df, geo_df, var, years=range(2014, 2024),
                   k=6, out_dir=OUT_DIR, include_suptitle=False):
    """十年 Moran 散点 2×5 面板图（每子图有年份标题）"""
    cells, zmins, zmaxs = [], [], []
    for yr in years:
        d = panel_df.loc[panel_df["year"] == yr, ["region", var]].dropna()
        if d.empty:
            continue
        gg = geo_df[["region","geometry"]].merge(d, on="region", how="inner").dropna(subset=[var])
        if len(gg) < 5:
            continue

        # 权重：KNN(k) 行标准化
        W = KNN.from_dataframe(gg, k=k, use_index=True)
        W.transform = "R"

        # 标准化变量 z，并计算空间滞后 wy
        x = gg[var].to_numpy(float)
        z  = (x - x.mean()) / x.std(ddof=0)
        wy = lag_spatial(W, z)

        cells.append((yr, z, wy))
        zmins.append(z.min()); zmaxs.append(z.max())

    if not cells:
        return None

    # 统一坐标范围、布局
    zmin, zmax = min(zmins), max(zmaxs)
    n = len(cells); ncols = 5; nrows = math.ceil(n / ncols)

    fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*2.6, nrows*2.4))
    axes = np.array(axes).reshape(nrows, ncols)

    for k_, (yr, z, wy) in enumerate(cells):
        r, c = divmod(k_, ncols)
        ax = axes[r, c]
        ax.scatter(z, wy, s=12, alpha=0.75)
        ax.axhline(0, lw=0.6, alpha=0.6); ax.axvline(0, lw=0.6, alpha=0.6)
        xs = np.linspace(zmin, zmax, 120)
        slope = np.cov(z, wy)[0,1] / np.var(z, ddof=0)  # 斜率≈I（标准化下）
        ax.plot(xs, slope*xs, lw=1.0)
        ax.set_xlim(zmin, zmax)
        ax.set_xticks([]); ax.set_yticks([])
        ax.set_title(f"{yr}", fontsize=9)

    # 关掉多余子图
    for k_ in range(n, nrows*ncols):
        r, c = divmod(k_, ncols)
        axes[r, c].axis("off")

    if include_suptitle:
        fig.suptitle(f"Moran scatterplots — {var} (KNN k={k}, 2014–2023)", y=0.98, fontsize=11)
        fig.tight_layout(rect=[0,0,1,0.95])
    else:
        fig.tight_layout()

    out_path = out_dir / f"{var}_moran_scatter_grid_k{k}_2014_2023.png"
    fig.savefig(out_path, dpi=200, bbox_inches="tight")
    plt.close(fig)
    return out_path

# ========= 读数据 =========
gdf = gpd.read_file(GEO_PATH)
gid = detect_geo_id(gdf)
gdf["region"] = gdf[gid].astype("string").str.strip().str.upper()
# 投影：若是经纬度则转 LAEA Europe
try:
    if (not gdf.crs) or gdf.crs.to_epsg() == 4326:
        gdf = gdf.to_crs(3035)
except Exception:
    pass

pdf = pd.read_csv(PANEL_PATH, encoding="utf-8-sig")
pdf["region"] = pdf["region"].astype("string").str.strip().str.upper()
pdf["year"]   = pd.to_numeric(pdf["year"], errors="coerce")
for v in VARS:
    if v in pdf.columns:
        pdf[v] = pd.to_numeric(pdf[v], errors="coerce")

# ========= 生成并保存 =========
for var in VARS:
    p = moran_grid_10y(pdf, gdf, var=var, years=YEARS, k=KNN_K,
                       out_dir=OUT_DIR, include_suptitle=False)  # 不要总标题
    if p:
        print("✅ saved:", p)
    else:
        print(f"⚠️ no figure for {var} (insufficient data).")




✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\global_moran_knn6\ten_years\employment_rate_moran_scatter_grid_k6_2014_2023.png
✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\global_moran_knn6\ten_years\unemployment_rate_moran_scatter_grid_k6_2014_2023.png
✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\global_moran_knn6\ten_years\log_gdp_pc_moran_scatter_grid_k6_2014_2023.png
✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\global_moran_knn6\ten_years\vet_per_million_moran_scatter_grid_k6_2014_2023.png


In [11]:
# -*- coding: utf-8 -*-
"""
LISA (Local Moran's I) — KNN k=6, NUTS2, 2014–2023
输出：每年×每变量 CSV + 聚类地图；可选十年 2×5 面板图
"""

from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from libpysal.weights import KNN
from esda.moran import Moran_Local

# ========== 路径 ==========
PANEL_PATH = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\filtered_by_islands\panel_long_no_islands.csv"
GEO_PATH   = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\filtered_by_islands\NUTS2_2021_no_islands.gpkg"
OUT_DIR    = Path(r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\lisa_knn6")
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ========== 设置 ==========
YEARS        = list(range(2014, 2024))
VARS         = ["employment_rate", "unemployment_rate", "log_gdp_pc", "vet_per_million"]
K            = 6
ALPHA        = 0.05
PERMUTATIONS = 999
MAKE_GRID10  = True   # 十年 2×5 面板图开关

# ========== 工具 ==========
def detect_geo_id(gdf: gpd.GeoDataFrame) -> str:
    for c in ["NUTS_ID","nuts_id","NUTS_ID_2021","region","code","id"]:
        if c in gdf.columns: return c
    return [c for c in gdf.columns if c != gdf.geometry.name][0]

def zscore(x: np.ndarray) -> np.ndarray:
    x = x.astype(float)
    return (x - x.mean()) / x.std(ddof=0)

def lisa_cluster_labels(z, W, permutations=999, alpha=0.05):
    ml = Moran_Local(z, W, permutations=permutations)
    sig  = ml.p_sim < alpha
    quad = ml.q  # 1 HH, 2 LH, 3 LL, 4 HL
    labels = np.array(["Not significant"]*len(z), dtype=object)
    labels[sig & (quad==1)] = "High-High"
    labels[sig & (quad==2)] = "Low-High"
    labels[sig & (quad==3)] = "Low-Low"
    labels[sig & (quad==4)] = "High-Low"
    return ml, sig, quad, labels

# 颜色 & 顺序
CLRS  = {"High-High":"#d7191c","Low-Low":"#2c7bb6","High-Low":"#fdae61","Low-High":"#abd9e9","Not significant":"#e0e0e0"}
ORDER = ["High-High","Low-Low","High-Low","Low-High","Not significant"]

# ========== 读数据 ==========
gdf = gpd.read_file(GEO_PATH)
gid = detect_geo_id(gdf)
gdf["region"] = gdf[gid].astype("string").str.strip().str.upper()
if (not gdf.crs) or gdf.crs.to_epsg()==4326:
    gdf = gdf.to_crs(3035)

pdf = pd.read_csv(PANEL_PATH, encoding="utf-8-sig")
pdf["region"] = pdf["region"].astype("string").str.strip().str.upper()
pdf["year"]   = pd.to_numeric(pdf["year"], errors="coerce")
for v in VARS:
    if v in pdf.columns:
        pdf[v] = pd.to_numeric(pdf[v], errors="coerce")

# ========== 主循环 ==========
for var in VARS:
    vdir = OUT_DIR / var
    vdir.mkdir(parents=True, exist_ok=True)

    for yr in YEARS:
        d = pdf.loc[pdf["year"]==yr, ["region", var]].dropna()
        if d.empty: 
            continue

        gg = gdf[["region","geometry"]].merge(d, on="region", how="inner").dropna(subset=[var])
        if len(gg) < 5:
            continue

        # KNN k=6（行标准化）
        W = KNN.from_dataframe(gg, k=K, use_index=True); W.transform = "R"

        z = zscore(gg[var].to_numpy())
        ml, sig, quad, labels = lisa_cluster_labels(z, W, permutations=PERMUTATIONS, alpha=ALPHA)

        # ---- CSV ----
        out_tbl = gg[["region"]].copy()
        out_tbl["Ii"] = ml.Is
        out_tbl["p_value"] = ml.p_sim
        out_tbl["quadrant"] = quad
        out_tbl["significant"] = sig
        out_tbl["cluster"] = labels
        csv_path = vdir / f"lisa_knn6_{var}_{yr}.csv"
        out_tbl.to_csv(csv_path, index=False, encoding="utf-8-sig")

        # ---- 地图 ----
        gg_plot = gg.copy()
        gg_plot["cluster"] = pd.Categorical(labels, categories=ORDER)
        face = gg_plot["cluster"].map(CLRS).fillna("#e0e0e0")

        fig, ax = plt.subplots(figsize=(7.6, 6.0))
        gg_plot.plot(ax=ax, color=face, edgecolor="white", linewidth=0.2)
        ax.set_axis_off()
        ax.set_title(f"LISA — {var} ({yr})  α={ALPHA}, perms={PERMUTATIONS}\nKNN (k={K}, row-standardized)", fontsize=11)
        import matplotlib.patches as mpatches
        handles = [mpatches.Patch(color=CLRS[k], label=k) for k in ORDER]
        ax.legend(handles=handles, title="Cluster",
                  loc="center left", bbox_to_anchor=(1.02, 0.5),
                  frameon=True, fancybox=True, framealpha=0.98)
        fig.subplots_adjust(right=0.82)
        png_path = vdir / f"lisa_knn6_map_{var}_{yr}.png"
        fig.savefig(png_path, dpi=220, bbox_inches="tight")
        plt.close(fig)

        print(f"✅ {var} {yr}: CSV: {csv_path.name} | PNG: {png_path.name}")

# ========== 十年 2×5 面板（可选） ==========
if MAKE_GRID10:
    import math
    from libpysal.weights import lag_spatial

    def moran_grid_knn(panel_df, geo_df, var, years, k=K, out_dir=OUT_DIR):
        cells, zmins, zmaxs = [], [], []
        for yr in years:
            d = panel_df.loc[panel_df["year"]==yr, ["region",var]].dropna()
            if d.empty: continue
            gg = geo_df[["region","geometry"]].merge(d, on="region", how="inner").dropna(subset=[var])
            if len(gg)<5: continue
            W = KNN.from_dataframe(gg, k=k, use_index=True); W.transform="R"
            z  = zscore(gg[var].to_numpy()); wy = lag_spatial(W, z)
            cells.append((yr, z, wy)); zmins.append(z.min()); zmaxs.append(z.max())
        if not cells: return None

        zmin, zmax = min(zmins), max(zmaxs)
        n, ncols = len(cells), 5
        nrows = math.ceil(n/ncols)
        fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*2.6, nrows*2.4))
        axes = np.array(axes).reshape(nrows, ncols)

        for i,(yr,z,wy) in enumerate(cells):
            r,c = divmod(i, ncols)
            ax = axes[r,c]
            ax.scatter(z, wy, s=12, alpha=0.7)
            ax.axhline(0, color="k", lw=0.6, alpha=0.6)
            ax.axvline(0, color="k", lw=0.6, alpha=0.6)
            xs = np.linspace(zmin, zmax, 100)
            slope = np.cov(z, wy)[0,1] / np.var(z, ddof=0)
            ax.plot(xs, slope*xs, lw=1.0)
            ax.set_xlim(zmin, zmax)
            ax.set_xticks([]); ax.set_yticks([])
            ax.set_title(f"{yr}", fontsize=9)

        for j in range(n, nrows*ncols):
            r,c = divmod(j, ncols); axes[r,c].axis("off")

        fig.suptitle(f"Moran scatterplots — {var} (KNN k={k}, 2014–2023)", y=0.98, fontsize=11)
        fig.tight_layout(rect=[0,0,1,0.95])
        out_path = out_dir / f"{var}_moran_scatter_grid_knn{k}_2014_2023.png"
        fig.savefig(out_path, dpi=200); plt.close(fig)
        return out_path

    for var in VARS:
        pth = moran_grid_knn(pdf, gdf, var, YEARS, k=K, out_dir=OUT_DIR)
        if pth: print("✅ Saved 10-year grid:", pth)

print("\n🎯 All outputs →", OUT_DIR)


✅ employment_rate 2014: CSV: lisa_knn6_employment_rate_2014.csv | PNG: lisa_knn6_map_employment_rate_2014.png
✅ employment_rate 2015: CSV: lisa_knn6_employment_rate_2015.csv | PNG: lisa_knn6_map_employment_rate_2015.png
✅ employment_rate 2016: CSV: lisa_knn6_employment_rate_2016.csv | PNG: lisa_knn6_map_employment_rate_2016.png
✅ employment_rate 2017: CSV: lisa_knn6_employment_rate_2017.csv | PNG: lisa_knn6_map_employment_rate_2017.png
✅ employment_rate 2018: CSV: lisa_knn6_employment_rate_2018.csv | PNG: lisa_knn6_map_employment_rate_2018.png
✅ employment_rate 2019: CSV: lisa_knn6_employment_rate_2019.csv | PNG: lisa_knn6_map_employment_rate_2019.png
✅ employment_rate 2020: CSV: lisa_knn6_employment_rate_2020.csv | PNG: lisa_knn6_map_employment_rate_2020.png
✅ employment_rate 2021: CSV: lisa_knn6_employment_rate_2021.csv | PNG: lisa_knn6_map_employment_rate_2021.png
✅ employment_rate 2022: CSV: lisa_knn6_employment_rate_2022.csv | PNG: lisa_knn6_map_employment_rate_2022.png
✅ employme

In [2]:
# -*- coding: utf-8 -*-
"""
LISA (Local Moran's I) — KNN k=6, NUTS2, 2014–2023
输出：每年×每变量 CSV + 聚类地图；并把每变量 10 年图整合成 1 张 2×5 面板图
"""

from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from libpysal.weights import KNN
from esda.moran import Moran_Local

# ========== 路径 ==========
PANEL_PATH = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\panel_long_no_islands.csv"
GEO_PATH   = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\NUTS2_2021_no_islands.gpkg"
OUT_DIR    = Path(r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_10")
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ========== 设置 ==========
YEARS        = list(range(2014, 2024))
VARS         = ["employment_rate", "unemployment_rate", "log_gdp_pc", "vet_per_million"]
K            = 6
ALPHA        = 0.05
PERMUTATIONS = 999

# ========== 工具 ==========
def detect_geo_id(gdf: gpd.GeoDataFrame) -> str:
    for c in ["NUTS_ID","nuts_id","NUTS_ID_2021","region","code","id"]:
        if c in gdf.columns: return c
    return [c for c in gdf.columns if c != gdf.geometry.name][0]

def zscore(x: np.ndarray) -> np.ndarray:
    x = x.astype(float)
    return (x - x.mean()) / x.std(ddof=0)

def lisa_cluster_labels(z, W, permutations=999, alpha=0.05):
    ml = Moran_Local(z, W, permutations=permutations)
    sig  = ml.p_sim < alpha
    quad = ml.q  # 1 HH, 2 LH, 3 LL, 4 HL
    labels = np.array(["Not significant"]*len(z), dtype=object)
    labels[sig & (quad==1)] = "High-High"
    labels[sig & (quad==2)] = "Low-High"
    labels[sig & (quad==3)] = "Low-Low"
    labels[sig & (quad==4)] = "High-Low"
    return ml, sig, quad, labels

# 颜色 & 顺序
CLRS  = {"High-High":"#d7191c","Low-Low":"#2c7bb6","High-Low":"#fdae61","Low-High":"#abd9e9","Not significant":"#e0e0e0"}
ORDER = ["High-High","Low-Low","High-Low","Low-High","Not significant"]

# ========== 读数据 ==========
gdf = gpd.read_file(GEO_PATH)
gid = detect_geo_id(gdf)
gdf["region"] = gdf[gid].astype("string").str.strip().str.upper()
if (not gdf.crs) or gdf.crs.to_epsg()==4326:
    gdf = gdf.to_crs(3035)

pdf = pd.read_csv(PANEL_PATH, encoding="utf-8-sig")
pdf["region"] = pdf["region"].astype("string").str.strip().str.upper()
pdf["year"]   = pd.to_numeric(pdf["year"], errors="coerce")
for v in VARS:
    if v in pdf.columns:
        pdf[v] = pd.to_numeric(pdf[v], errors="coerce")

# 预先记录整幅地图范围，保证面板子图范围一致
xmin, ymin, xmax, ymax = gdf.total_bounds

# ========== 主循环 ==========
for var in VARS:
    vdir = OUT_DIR / var
    vdir.mkdir(parents=True, exist_ok=True)

    # 为面板图缓存每年的“涂色后”GeoDataFrame（只存 cluster 列即可）
    yearly_maps = []   # list of (year, gg_plot_with_cluster)

    for yr in YEARS:
        d = pdf.loc[pdf["year"]==yr, ["region", var]].dropna()
        if d.empty:
            continue

        gg = gdf[["region","geometry"]].merge(d, on="region", how="inner").dropna(subset=[var])
        if len(gg) < 5:
            continue

        # KNN k=6（行标准化）
        W = KNN.from_dataframe(gg, k=K, use_index=True); W.transform = "R"

        z = zscore(gg[var].to_numpy())
        ml, sig, quad, labels = lisa_cluster_labels(z, W, permutations=PERMUTATIONS, alpha=ALPHA)

        # ---- CSV ----
        out_tbl = gg[["region"]].copy()
        out_tbl["Ii"] = ml.Is
        out_tbl["p_value"] = ml.p_sim
        out_tbl["quadrant"] = quad
        out_tbl["significant"] = sig
        out_tbl["cluster"] = labels
        csv_path = vdir / f"lisa_knn6_{var}_{yr}.csv"
        out_tbl.to_csv(csv_path, index=False, encoding="utf-8-sig")

        # ---- 单年地图 ----
        gg_plot = gg.copy()
        gg_plot["cluster"] = pd.Categorical(labels, categories=ORDER)
        face = gg_plot["cluster"].map(CLRS).fillna("#e0e0e0")

        fig, ax = plt.subplots(figsize=(7.6, 6.0))
        gg_plot.plot(ax=ax, color=face, edgecolor="white", linewidth=0.2)
        ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)
        ax.set_axis_off()
        ax.set_title(f"LISA — {var} ({yr})  α={ALPHA}, perms={PERMUTATIONS}\nKNN (k={K}, row-standardized)", fontsize=11)
        import matplotlib.patches as mpatches
        handles = [mpatches.Patch(color=CLRS[k], label=k) for k in ORDER]
        ax.legend(handles=handles, title="Cluster",
                  loc="center left", bbox_to_anchor=(1.02, 0.5),
                  frameon=True, fancybox=True, framealpha=0.98)
        fig.subplots_adjust(right=0.82)
        png_path = vdir / f"lisa_knn6_map_{var}_{yr}.png"
        fig.savefig(png_path, dpi=220, bbox_inches="tight")
        plt.close(fig)

        # —— 为面板图缓存（只存 cluster 分类即可）——
        cache = gg[["region","geometry"]].copy()
        cache["cluster"] = gg_plot["cluster"].astype(str).values
        yearly_maps.append((yr, cache))

        print(f"✅ {var} {yr}: CSV: {csv_path.name} | PNG: {png_path.name}")

    # ========== 每变量十年合一（2×5）面板图 ==========
    if len(yearly_maps) > 0:
        # 按年份排序
        yearly_maps = sorted(yearly_maps, key=lambda x: x[0])

        n = len(yearly_maps)
        ncols = 5
        nrows = int(np.ceil(n / ncols))

        fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*3.2, nrows*3.0))
        axes = np.array(axes).reshape(nrows, ncols)

        for i, (yr, cache) in enumerate(yearly_maps):
            r, c = divmod(i, ncols)
            ax = axes[r, c]
            # 颜色映射
            face = cache["cluster"].map(CLRS).fillna("#e0e0e0")
            cache.plot(ax=ax, color=face, edgecolor="white", linewidth=0.18)
            ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)
            ax.set_title(str(yr), fontsize=10)
            ax.set_axis_off()

        # 多余子图关掉
        for k in range(n, nrows*ncols):
            r, c = divmod(k, ncols)
            axes[r, c].axis("off")

        # 总标题 & 图例（整张图只放一个图例）
        import matplotlib.patches as mpatches
        handles = [mpatches.Patch(color=CLRS[k], label=k) for k in ORDER]
        fig.suptitle(f"LISA clusters — {var} (KNN k={K}, α={ALPHA}, perms={PERMUTATIONS})  |  2014–2023",
                     fontsize=12, y=0.98)
        fig.legend(handles=handles, title="Cluster",
                   loc="center left", bbox_to_anchor=(1.02, 0.5),
                   frameon=True, fancybox=True, framealpha=0.98)

        fig.tight_layout(rect=[0,0,0.98,0.96])
        grid_png = OUT_DIR / f"{var}_LISA_grid_2014_2023_knn{K}.png"
        fig.savefig(grid_png, dpi=220, bbox_inches="tight")
        plt.close(fig)
        print(f"🧩 Saved 10-year LISA grid for {var}: {grid_png}")


✅ employment_rate 2014: CSV: lisa_knn6_employment_rate_2014.csv | PNG: lisa_knn6_map_employment_rate_2014.png
✅ employment_rate 2015: CSV: lisa_knn6_employment_rate_2015.csv | PNG: lisa_knn6_map_employment_rate_2015.png
✅ employment_rate 2016: CSV: lisa_knn6_employment_rate_2016.csv | PNG: lisa_knn6_map_employment_rate_2016.png
✅ employment_rate 2017: CSV: lisa_knn6_employment_rate_2017.csv | PNG: lisa_knn6_map_employment_rate_2017.png
✅ employment_rate 2018: CSV: lisa_knn6_employment_rate_2018.csv | PNG: lisa_knn6_map_employment_rate_2018.png
✅ employment_rate 2019: CSV: lisa_knn6_employment_rate_2019.csv | PNG: lisa_knn6_map_employment_rate_2019.png
✅ employment_rate 2020: CSV: lisa_knn6_employment_rate_2020.csv | PNG: lisa_knn6_map_employment_rate_2020.png
✅ employment_rate 2021: CSV: lisa_knn6_employment_rate_2021.csv | PNG: lisa_knn6_map_employment_rate_2021.png
✅ employment_rate 2022: CSV: lisa_knn6_employment_rate_2022.csv | PNG: lisa_knn6_map_employment_rate_2022.png
✅ employme

In [9]:
# -*- coding: utf-8 -*-
"""
LISA (Local Moran's I) — KNN k=6, NUTS2, 2014–2023
方案A：
- 正文：每变量 3 个代表年份（2014, 2019, 2023） → 1×3 面板图（大、清晰、共用图例、无总标题）
- 附录（可选）：每变量 10 年 → 2×5 面板图
- 逐年：CSV + 单年地图（保留）
"""

from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from libpysal.weights import KNN
from esda.moran import Moran_Local

# ========== 路径 ==========
PANEL_PATH = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\panel_long_no_islands.csv"
GEO_PATH   = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\NUTS2_2021_no_islands.gpkg"
OUT_DIR    = Path(r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_A")
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ========== 设置 ==========
YEARS          = list(range(2014, 2024))
REP_YEARS      = [2014, 2019, 2023]     # 正文代表年份（可改）
VARS           = ["employment_rate", "unemployment_rate", "log_gdp_pc", "vet_per_million"]
K              = 6
ALPHA          = 0.05
PERMUTATIONS   = 999
MAKE_APPENDIX  = True                   # 是否额外输出 2×5 全时期面板

# ========== 工具 ==========
def detect_geo_id(gdf: gpd.GeoDataFrame) -> str:
    for c in ["NUTS_ID","nuts_id","NUTS_ID_2021","region","code","id"]:
        if c in gdf.columns: return c
    return [c for c in gdf.columns if c != gdf.geometry.name][0]

def zscore(x: np.ndarray) -> np.ndarray:
    x = x.astype(float)
    return (x - x.mean()) / x.std(ddof=0)

def lisa_cluster_labels(z, W, permutations=999, alpha=0.05):
    ml = Moran_Local(z, W, permutations=permutations)
    sig  = ml.p_sim < alpha
    quad = ml.q  # 1 HH, 2 LH, 3 LL, 4 HL
    labels = np.array(["Not significant"]*len(z), dtype=object)
    labels[sig & (quad==1)] = "High-High"
    labels[sig & (quad==2)] = "Low-High"
    labels[sig & (quad==3)] = "Low-Low"
    labels[sig & (quad==4)] = "High-Low"
    return ml, sig, quad, labels

# 颜色 & 顺序（四图统一）
CLRS  = {"High-High":"#d7191c","Low-Low":"#2c7bb6","High-Low":"#fdae61","Low-High":"#abd9e9","Not significant":"#e0e0e0"}
ORDER = ["High-High","Low-Low","High-Low","Low-High","Not significant"]

# ========== 读数据 ==========
gdf = gpd.read_file(GEO_PATH)
gid = detect_geo_id(gdf)
gdf["region"] = gdf[gid].astype("string").str.strip().str.upper()
if (not gdf.crs) or gdf.crs.to_epsg()==4326:
    gdf = gdf.to_crs(3035)

pdf = pd.read_csv(PANEL_PATH, encoding="utf-8-sig")
pdf["region"] = pdf["region"].astype("string").str.strip().str.upper()
pdf["year"]   = pd.to_numeric(pdf["year"], errors="coerce")
for v in VARS:
    if v in pdf.columns:
        pdf[v] = pd.to_numeric(pdf[v], errors="coerce")

# 地图范围一致
xmin, ymin, xmax, ymax = gdf.total_bounds

# ========== 主循环 ==========
for var in VARS:
    vdir = OUT_DIR / var
    (vdir / "single_year").mkdir(parents=True, exist_ok=True)

    yearly_maps_all  = []   # 全年：[(year, cache_gdf)]
    yearly_maps_rep  = []   # 代表年：[(year, cache_gdf)]

    for yr in YEARS:
        d = pdf.loc[pdf["year"]==yr, ["region", var]].dropna()
        if d.empty:
            continue

        gg = gdf[["region","geometry"]].merge(d, on="region", how="inner").dropna(subset=[var])
        if len(gg) < 5:
            continue

        # KNN k=6（行标准化）
        W = KNN.from_dataframe(gg, k=K, use_index=True); W.transform = "R"

        z = zscore(gg[var].to_numpy())
        ml, sig, quad, labels = lisa_cluster_labels(z, W, permutations=PERMUTATIONS, alpha=ALPHA)

        # ---- CSV ----
        out_tbl = gg[["region"]].copy()
        out_tbl["Ii"] = ml.Is
        out_tbl["p_value"] = ml.p_sim
        out_tbl["quadrant"] = quad
        out_tbl["significant"] = sig
        out_tbl["cluster"] = labels
        csv_path = vdir / f"lisa_knn6_{var}_{yr}.csv"
        out_tbl.to_csv(csv_path, index=False, encoding="utf-8-sig")

        # ---- 单年地图（保留；可用于补充或检查）----
        gg_plot = gg.copy()
        gg_plot["cluster"] = pd.Categorical(labels, categories=ORDER)
        face = gg_plot["cluster"].map(CLRS).fillna("#e0e0e0")
        fig, ax = plt.subplots(figsize=(7.2, 6.0))
        gg_plot.plot(ax=ax, color=face, edgecolor="white", linewidth=0.2)
        ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)
        ax.set_axis_off()
        ax.set_title(f"LISA — {var} ({yr})  α={ALPHA}, perms={PERMUTATIONS}\nKNN (k={K}, row-standardized)", fontsize=11)
        import matplotlib.patches as mpatches
        handles = [mpatches.Patch(color=CLRS[k], label=k) for k in ORDER]
        ax.legend(handles=handles, title="Cluster",
                  loc="center left", bbox_to_anchor=(1.02, 0.5),
                  frameon=True, fancybox=True, framealpha=0.98)
        fig.subplots_adjust(right=0.82)
        fig.savefig(vdir / "single_year" / f"lisa_knn6_map_{var}_{yr}.png", dpi=220, bbox_inches="tight")
        plt.close(fig)

        # —— 缓存（全年 & 代表年）——
        cache = gg[["region","geometry"]].copy()
        cache["cluster"] = pd.Categorical(labels, categories=ORDER).astype(str)
        yearly_maps_all.append((yr, cache))
        if yr in REP_YEARS:
            yearly_maps_rep.append((yr, cache))

        print(f"✅ {var} {yr}: CSV + PNG done.")

    # ========== 正文：代表年 1×3 面板 ==========
    if len(yearly_maps_rep) > 0:
        yearly_maps_rep = sorted(yearly_maps_rep, key=lambda x: x[0])
        n = len(yearly_maps_rep)
        ncols, nrows = n, 1  # 1×3

        # 每个面板宽度略大一些，便于正文放大
        fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*3.8, nrows*3.6))
        if n == 1:
            axes = np.array([axes])
        axes = np.array(axes).reshape(nrows, ncols)

        for i, (yr, cache) in enumerate(yearly_maps_rep):
            ax = axes[0, i]
            face = cache["cluster"].map(CLRS).fillna("#e0e0e0")
            cache.plot(ax=ax, color=face, edgecolor="white", linewidth=0.22)
            ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)
            ax.set_title(str(yr), fontsize=12)
            ax.set_axis_off()

        # 共用图例（整张图一个）
        import matplotlib.patches as mpatches
        handles = [mpatches.Patch(color=CLRS[k], label=k) for k in ORDER]
        fig.legend(handles=handles, title="Cluster",
                   loc="center right", bbox_to_anchor=(1.02, 0.5),
                   frameon=True, fancybox=True, framealpha=0.98)

        fig.tight_layout(rect=[0, 0, 0.98, 1])
        main_png = OUT_DIR / f"{var}_LISA_rep_years_{'-'.join(map(str, REP_YEARS))}_knn{K}.png"
        fig.savefig(main_png, dpi=240, bbox_inches="tight")
        plt.close(fig)
        print(f"📌 Saved MAIN (1x{n}) LISA figure for {var}: {main_png}")

    # ========== 附录：10 年 2×5 面板（可选） ==========
    if MAKE_APPENDIX and len(yearly_maps_all) > 0:
        yearly_maps_all = sorted(yearly_maps_all, key=lambda x: x[0])
        n = len(yearly_maps_all); ncols = 5; nrows = int(np.ceil(n / ncols))
        fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*3.0, nrows*2.8))
        axes = np.array(axes).reshape(nrows, ncols)

        for i, (yr, cache) in enumerate(yearly_maps_all):
            r, c = divmod(i, ncols); ax = axes[r, c]
            face = cache["cluster"].map(CLRS).fillna("#e0e0e0")
            cache.plot(ax=ax, color=face, edgecolor="white", linewidth=0.18)
            ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)
            ax.set_title(str(yr), fontsize=10); ax.set_axis_off()

        # 多余子图关掉
        for k in range(n, nrows*ncols):
            r, c = divmod(k, ncols); axes[r, c].axis("off")

        import matplotlib.patches as mpatches
        handles = [mpatches.Patch(color=CLRS[k], label=k) for k in ORDER]
        fig.legend(handles=handles, title="Cluster",
                   loc="center right", bbox_to_anchor=(1.02, 0.5),
                   frameon=True, fancybox=True, framealpha=0.98)

        fig.tight_layout(rect=[0,0,0.98,1])
        grid_png = OUT_DIR / f"{var}_LISA_grid_2014_2023_knn{K}.png"
        fig.savefig(grid_png, dpi=220, bbox_inches="tight")
        plt.close(fig)
        print(f"🧩 Saved APPENDIX (2x5) LISA grid for {var}: {grid_png}")


✅ employment_rate 2014: CSV + PNG done.
✅ employment_rate 2015: CSV + PNG done.
✅ employment_rate 2016: CSV + PNG done.
✅ employment_rate 2017: CSV + PNG done.
✅ employment_rate 2018: CSV + PNG done.
✅ employment_rate 2019: CSV + PNG done.
✅ employment_rate 2020: CSV + PNG done.
✅ employment_rate 2021: CSV + PNG done.
✅ employment_rate 2022: CSV + PNG done.
✅ employment_rate 2023: CSV + PNG done.
📌 Saved MAIN (1x3) LISA figure for employment_rate: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_A\employment_rate_LISA_rep_years_2014-2019-2023_knn6.png
🧩 Saved APPENDIX (2x5) LISA grid for employment_rate: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_A\employment_rate_LISA_grid_2014_2023_knn6.png
✅ unemployment_rate 2014: CSV + PNG done.
✅ unemployment_rate 2015: CSV + PNG done.
✅ unemployment_rate 2016: CSV + PNG done.
✅ unemployment_rate 2017: CSV + PNG done.
✅ unemployment_rate 2018: CSV + PNG done.
✅ unemployment_rat

In [10]:
# -*- coding: utf-8 -*-
"""
LISA (Local Moran's I) — KNN k=6, NUTS2
只生成：每变量 2014/2018/2020/2023 的 2×2 面板图（共用图例）
"""

from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from libpysal.weights import KNN
from esda.moran import Moran_Local

# ========== 路径 ==========
PANEL_PATH = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\panel_long_no_islands.csv"
GEO_PATH   = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\NUTS2_2021_no_islands.gpkg"
OUT_DIR    = Path(r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_2x2")
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ========== 设置 ==========
YEARS_2x2    = [2014, 2018, 2020, 2023]     # 只做这四年
VARS         = ["employment_rate", "unemployment_rate", "log_gdp_pc", "vet_per_million"]
K            = 6
ALPHA        = 0.05
PERMUTATIONS = 999

# 颜色 & 顺序
CLRS  = {"High-High":"#d7191c","Low-Low":"#2c7bb6","High-Low":"#fdae61",
         "Low-High":"#abd9e9","Not significant":"#e0e0e0"}
ORDER = ["High-High","Low-Low","High-Low","Low-High","Not significant"]

# ========== 工具 ==========
def detect_geo_id(gdf: gpd.GeoDataFrame) -> str:
    for c in ["NUTS_ID","nuts_id","NUTS_ID_2021","region","code","id"]:
        if c in gdf.columns: return c
    return [c for c in gdf.columns if c != gdf.geometry.name][0]

def zscore(x: np.ndarray) -> np.ndarray:
    x = x.astype(float)
    return (x - x.mean()) / x.std(ddof=0)

def lisa_labels(z, W, perms=999, alpha=0.05):
    ml = Moran_Local(z, W, permutations=perms)
    sig  = ml.p_sim < alpha
    quad = ml.q  # 1 HH, 2 LH, 3 LL, 4 HL
    lab = np.array(["Not significant"]*len(z), dtype=object)
    lab[sig & (quad==1)] = "High-High"
    lab[sig & (quad==2)] = "Low-High"
    lab[sig & (quad==3)] = "Low-Low"
    lab[sig & (quad==4)] = "High-Low"
    return lab

# ========== 读数据 ==========
gdf = gpd.read_file(GEO_PATH)
gid = detect_geo_id(gdf)
gdf["region"] = gdf[gid].astype("string").str.strip().str.upper()
if (not gdf.crs) or gdf.crs.to_epsg()==4326:
    gdf = gdf.to_crs(3035)
xmin, ymin, xmax, ymax = gdf.total_bounds

pdf = pd.read_csv(PANEL_PATH, encoding="utf-8-sig")
pdf["region"] = pdf["region"].astype("string").str.strip().str.upper()
pdf["year"]   = pd.to_numeric(pdf["year"], errors="coerce")
for v in VARS:
    if v in pdf.columns:
        pdf[v] = pd.to_numeric(pdf[v], errors="coerce")

# ========== 生成 2×2 面板 ==========
for var in VARS:
    caches = []
    for yr in YEARS_2x2:
        d = pdf.loc[pdf["year"]==yr, ["region", var]].dropna()
        gg = gdf[["region","geometry"]].merge(d, on="region", how="inner").dropna(subset=[var])
        if len(gg) < 5:
            raise ValueError(f"{var} {yr}: 有效地区过少。")
        W = KNN.from_dataframe(gg, k=K, use_index=True); W.transform = "R"
        z = zscore(gg[var].to_numpy())
        labels = lisa_labels(z, W, perms=PERMUTATIONS, alpha=ALPHA)
        cache = gg[["region","geometry"]].copy()
        cache["cluster"] = pd.Categorical(labels, categories=ORDER).astype(str)
        caches.append((yr, cache))

    # 画 2×2（顺序按 YEARS_2x2）
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    axes = axes.reshape(2, 2)
    for ax, (yr, cache) in zip(axes.ravel(), caches):
        face = cache["cluster"].map(CLRS).fillna("#e0e0e0")
        cache.plot(ax=ax, color=face, edgecolor="white", linewidth=0.22)
        ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)
        ax.set_title(str(yr), fontsize=14, pad=6)
        ax.set_axis_off()

    # 共用图例（右侧）
    import matplotlib.patches as mpatches
    handles = [mpatches.Patch(color=CLRS[k], label=k) for k in ORDER]
    fig.legend(handles=handles, title="Cluster",
               loc="center right", bbox_to_anchor=(1.02, 0.5),
               frameon=True, fancybox=True, framealpha=0.98)

    fig.tight_layout(rect=[0,0,0.98,1])
    out_png = OUT_DIR / f"{var}_LISA_2x2_{YEARS_2x2[0]}_{YEARS_2x2[1]}_{YEARS_2x2[2]}_{YEARS_2x2[3]}_knn{K}.png"
    fig.savefig(out_png, dpi=240, bbox_inches="tight")
    plt.close(fig)
    print("✅ saved:", out_png)


✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_2x2\employment_rate_LISA_2x2_2014_2018_2020_2023_knn6.png
✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_2x2\unemployment_rate_LISA_2x2_2014_2018_2020_2023_knn6.png
✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_2x2\log_gdp_pc_LISA_2x2_2014_2018_2020_2023_knn6.png
✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_2x2\vet_per_million_LISA_2x2_2014_2018_2020_2023_knn6.png


In [None]:
# -*- coding: utf-8 -*-
"""
LISA (Local Moran's I) — KNN k=6, NUTS2
只生成：每变量 2014/2018/2020/2023 的 2×2 面板图（共用图例、年份居中）
样式与示例图一致。
"""

from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from libpysal.weights import KNN
from esda.moran import Moran_Local
import matplotlib.patches as mpatches

# ========== 路径（按你的工程修改） ==========
PANEL_PATH = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\panel_long_no_islands.csv"
GEO_PATH   = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\NUTS2_2021_no_islands.gpkg"
OUT_DIR    = Path(r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_2x2")
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ========== 设置 ==========
YEARS_2x2    = [2014, 2018, 2020, 2023]               # 只做这四年
VARS         = ["employment_rate", "unemployment_rate", "log_gdp_pc", "vet_per_million"]
K            = 6
ALPHA        = 0.05
PERMUTATIONS = 999

# 颜色 & 顺序
CLRS  = {
    "High-High":      "#d7191c",
    "Low-Low":        "#2c7bb6",
    "High-Low":       "#fdae61",
    "Low-High":       "#abd9e9",
    "Not significant": "#e0e0e0",
}
ORDER = ["High-High", "Low-Low", "High-Low", "Low-High", "Not significant"]

# ========== 工具 ==========
def detect_geo_id(gdf: gpd.GeoDataFrame) -> str:
    for c in ["region", "NUTS_ID", "nuts_id", "NUTS_ID_2021", "NUTS2_ID", "NUTS_CODE", "code", "id"]:
        if c in gdf.columns:
            return c
    # 兜底：取第一个非 geometry 列
    return [c for c in gdf.columns if c != gdf.geometry.name][0]


def zscore(x: np.ndarray) -> np.ndarray:
    x = x.astype(float)
    return (x - x.mean()) / x.std(ddof=0)


def lisa_labels(z, W, perms=999, alpha=0.05):
    ml = Moran_Local(z, W, permutations=perms)
    sig  = ml.p_sim < alpha
    quad = ml.q  # 1 HH, 2 LH, 3 LL, 4 HL
    lab = np.array(["Not significant"] * len(z), dtype=object)
    lab[sig & (quad == 1)] = "High-High"
    lab[sig & (quad == 2)] = "Low-High"
    lab[sig & (quad == 3)] = "Low-Low"
    lab[sig & (quad == 4)] = "High-Low"
    return lab

# ========== 读数据 ==========
gdf = gpd.read_file(GEO_PATH)
gid = detect_geo_id(gdf)
# 统一 ID 列名为 region（大写去空格）
gdf["region"] = gdf[gid].astype("string").str.strip().str.upper()
# 投影：若无 CRS 或是 WGS84，则转到 3035，便于欧盟地图显示
if (not gdf.crs) or gdf.crs.to_epsg() == 4326:
    gdf = gdf.to_crs(3035)
# 固定统一视窗，保证 2×2 四幅图对齐
xmin, ymin, xmax, ymax = gdf.total_bounds

# 面板数据
pdf = pd.read_csv(PANEL_PATH, encoding="utf-8-sig")
pdf["region"] = pdf["region"].astype("string").str.strip().str.upper()
pdf["year"]   = pd.to_numeric(pdf["year"], errors="coerce")
for v in VARS:
    if v in pdf.columns:
        pdf[v] = pd.to_numeric(pdf[v], errors="coerce")

# ========== 生成 2×2 面板 ==========
for var in VARS:
    caches = []
    for yr in YEARS_2x2:
        d = pdf.loc[pdf["year"] == yr, ["region", var]].dropna()
        gg = gdf[["region", "geometry"]].merge(d, on="region", how="inner").dropna(subset=[var])
        if len(gg) < max(5, K + 1):
            raise ValueError(f"{var} {yr}: 有效地区过少（{len(gg)} < {max(5, K+1)}）。")
        # 基于 dataframe 构建 KNN（使用索引作为 ID）
        W = KNN.from_dataframe(gg, k=K, use_index=True)
        W.transform = "R"
        z = zscore(gg[var].to_numpy())
        labels = lisa_labels(z, W, perms=PERMUTATIONS, alpha=ALPHA)
        cache = gg[["region", "geometry"]].copy()
        cache["cluster"] = pd.Categorical(labels, categories=ORDER).astype(str)
        caches.append((yr, cache))

    # 画 2×2（顺序按 YEARS_2x2）
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    for ax, (yr, cache) in zip(axes.ravel(), caches):
        face = cache["cluster"].map(CLRS).fillna("#e0e0e0")
        cache.plot(ax=ax, color=face, edgecolor="white", linewidth=0.22)
        ax.set_xlim(xmin, xmax)
        ax.set_ylim(ymin, ymax)
        ax.set_title(str(yr), fontsize=14, pad=6)
        ax.set_axis_off()

    # 共用图例（右侧）
    handles = [mpatches.Patch(color=CLRS[k], label=k) for k in ORDER]
    fig.legend(
        handles=handles,
        title="Cluster",
        loc="center right",
        bbox_to_anchor=(1.02, 0.5),
        frameon=True,
        fancybox=True,
        framealpha=0.98,
    )

    fig.tight_layout(rect=[0, 0, 0.98, 1])
    out_png = OUT_DIR / f"{var}_LISA_2x2_{YEARS_2x2[0]}_{YEARS_2x2[1]}_{YEARS_2x2[2]}_{YEARS_2x2[3]}_knn{K}.png"
    fig.savefig(out_png, dpi=240, bbox_inches="tight")
    plt.close(fig)
    print("✅ saved:", out_png)


In [3]:
# -*- coding: utf-8 -*-
"""
LISA (Local Moran's I) — KNN k=6, NUTS2
只生成：每变量 2014/2018/2020/2023 的 2×2 面板图（共用图例，年份居中）
"""

from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from libpysal.weights import KNN
from esda.moran import Moran_Local
import matplotlib.patches as mpatches

# ========= 路径 =========
PANEL_PATH = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\panel_long_no_islands.csv"
GEO_PATH   = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\filtered_by_islands\NUTS2_2021_no_islands.gpkg"
OUT_DIR    = Path(r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_2x2")

OUT_DIR.mkdir(parents=True, exist_ok=True)

# ========= 设置 =========
YEARS_2x2    = [2014, 2018, 2020, 2023]   # 只做这四年
VARS         = ["employment_rate", "unemployment_rate", "log_gdp_pc", "vet_per_million"]
K            = 6
ALPHA        = 0.05
PERMUTATIONS = 999

# 颜色 & 顺序（与示例一致）
CLRS  = {
    "High-High":       "#d7191c",
    "Low-Low":         "#2c7bb6",
    "High-Low":        "#fdae61",
    "Low-High":        "#abd9e9",
    "Not significant": "#e0e0e0",
}
ORDER = ["High-High", "Low-Low", "High-Low", "Low-High", "Not significant"]

plt.rcParams.update({"figure.dpi": 180, "savefig.dpi": 240})

# ========= 工具 =========
def detect_geo_id(gdf: gpd.GeoDataFrame) -> str:
    for c in ["NUTS_ID", "nuts_id", "NUTS_ID_2021", "region", "code", "id"]:
        if c in gdf.columns:
            return c
    # 兜底：取第一个非 geometry 列
    return [c for c in gdf.columns if c != gdf.geometry.name][0]

def zscore(x: np.ndarray) -> np.ndarray:
    x = x.astype(float)
    return (x - x.mean()) / x.std(ddof=0)

def lisa_labels(z, W, perms=999, alpha=0.05):
    ml = Moran_Local(z, W, permutations=perms)
    sig  = ml.p_sim < alpha
    quad = ml.q  # 1 HH, 2 LH, 3 LL, 4 HL
    lab = np.array(["Not significant"] * len(z), dtype=object)
    lab[sig & (quad == 1)] = "High-High"
    lab[sig & (quad == 2)] = "Low-High"
    lab[sig & (quad == 3)] = "Low-Low"
    lab[sig & (quad == 4)] = "High-Low"
    return lab

# ========= 读数据 =========
gdf = gpd.read_file(GEO_PATH)
gid = detect_geo_id(gdf)
gdf["region"] = gdf[gid].astype("string").str.strip().str.upper()

# 投影一致（若是WGS84或无CRS则转 3035）
if (not gdf.crs) or gdf.crs.to_epsg() == 4326:
    gdf = gdf.to_crs(3035)

# 统一可视范围，保证四图对齐
xmin, ymin, xmax, ymax = gdf.total_bounds

pdf = pd.read_csv(PANEL_PATH, encoding="utf-8-sig")
pdf["region"] = pdf["region"].astype("string").str.strip().str.upper()
pdf["year"]   = pd.to_numeric(pdf["year"], errors="coerce")
for v in VARS:
    if v in pdf.columns:
        pdf[v] = pd.to_numeric(pdf[v], errors="coerce")

# ========= 生成 2×2 面板 =========
for var in VARS:
    caches = []
    for yr in YEARS_2x2:
        d = pdf.loc[pdf["year"] == yr, ["region", var]].dropna()
        gg = gdf[["region", "geometry"]].merge(d, on="region", how="inner").dropna(subset=[var])
        if len(gg) < max(5, K + 1):
            raise ValueError(f"{var} {yr}: 有效地区过少（{len(gg)} < {max(5, K+1)}）。")

        # 用几何直接构建 KNN 权重；行标准化
        W = KNN.from_dataframe(gg, k=K, use_index=True)
        W.transform = "R"

        z = zscore(gg[var].to_numpy())
        labels = lisa_labels(z, W, perms=PERMUTATIONS, alpha=ALPHA)

        cache = gg[["region", "geometry"]].copy()
        cache["cluster"] = pd.Categorical(labels, categories=ORDER).astype(str)
        caches.append((yr, cache))

    # 画 2×2（顺序按 YEARS_2x2）
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    for ax, (yr, cache) in zip(axes.ravel(), caches):
        face = cache["cluster"].map(CLRS).fillna("#e0e0e0")
        cache.plot(ax=ax, color=face, edgecolor="white", linewidth=0.22)
        ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)
        ax.set_title(str(yr), fontsize=14, pad=6)
        ax.set_axis_off()

    # 共用图例（右侧，卡片样式）
    handles = [mpatches.Patch(color=CLRS[k], label=k) for k in ORDER]
    fig.legend(handles=handles, title="Cluster",
               loc="center right", bbox_to_anchor=(1.02, 0.5),
               frameon=True, fancybox=True, framealpha=0.98)

    fig.tight_layout(rect=[0, 0, 0.98, 1])
    out_png = OUT_DIR / f"{var}_LISA_2x2_{YEARS_2x2[0]}_{YEARS_2x2[1]}_{YEARS_2x2[2]}_{YEARS_2x2[3]}_knn{K}.png"
    fig.savefig(out_png, dpi=240, bbox_inches="tight")
    plt.close(fig)
    print("✅ saved:", out_png)


✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_2x2\employment_rate_LISA_2x2_2014_2018_2020_2023_knn6.png
✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_2x2\unemployment_rate_LISA_2x2_2014_2018_2020_2023_knn6.png
✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_2x2\log_gdp_pc_LISA_2x2_2014_2018_2020_2023_knn6.png
✅ saved: D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\moran\lisa_2x2\vet_per_million_LISA_2x2_2014_2018_2020_2023_knn6.png
