In [3]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.stats import norm

# ========= 参数 =========
CSV = r"D:\Dissertation\dissertation\data 2\data\Without UK and Germany\Final\panel_long_merged.csv"
ID_COL = "region"     # 地区列名（若不同请改）
TIME_COL = "year"     # 年份列名（若不同请改）
LAGS = 2              # Granger 阶数（你也可以改成 1 或 3 做稳健性）
YEAR_MIN, YEAR_MAX = 2013, 2023

# ========= 读 & 变量处理 =========
df = pd.read_csv(CSV)

# 年份筛选
df = df[(df[TIME_COL] >= YEAR_MIN) & (df[TIME_COL] <= YEAR_MAX)].copy()

# 构造对数变量（安全处理）
eps = 1e-8
df["ln_gdp"] = df["log_gdp_pc"]  # 已经是 log

# vet_per_million 取 log
df["ln_vet"] = np.log(np.maximum(df["vet_per_million"].astype(float), eps))

# employment_rate：如果是 0-100，先除以100；然后 log
emp = df["employment_rate"].astype(float)
if emp.max(skipna=True) > 1.01:  # 看起来是百分比
    emp = emp / 100.0
df["ln_emp"] = np.log(np.maximum(emp, eps))

# ========= 辅助函数：构造按实体的滞后数据 =========
def wald_chi2_for_entity(df_e, y, x, p):
    """
    对单个实体做回归： y_t ~ const + y_{t-1..p} + x_{t-1..p}
    并对 x_{t-1..p} 的联合零假设 R*beta=0 做 Wald(χ²_p) 检验。
    返回 (W, model)。若样本奇异/失败，返回 (None, None)。
    """
    try:
        # 1) 设计矩阵
        y_col = y
        Xcols = [f"{y}_L{k}" for k in range(1, p+1)] + [f"{x}_L{k}" for k in range(1, p+1)]
        X = sm.add_constant(df_e[Xcols].to_numpy(), has_constant="add")
        yv = df_e[y_col].to_numpy()

        # 简单的秩检查：如果秩 < 参数个数，回归不可识别，跳过
        if np.linalg.matrix_rank(X) < X.shape[1]:
            return None, None

        # 2) OLS
        mdl = sm.OLS(yv, X).fit()

        # 3) 只约束 x 的 p 个滞后系数：R*beta = 0
        k_const = 1
        k_y = p
        k_total = X.shape[1]
        R = np.zeros((p, k_total), dtype=float)
        for i in range(p):
            R[i, k_const + k_y + i] = 1.0

        # 4) Wald 检验 —— 只传 R，不传 r（默认为 0 向量）
        wt = mdl.wald_test(R)
        W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
        return W, mdl

    except Exception:
        # 该实体发生数值问题，忽略
        return None, None

# ========= Dumitrescu–Hurlin 面板因果检验 =========
from scipy.stats import norm, chi2

def dumitrescu_hurlin(df, y, x, id_col, t_col, p):
    """
    Dumitrescu–Hurlin(2012) 面板因果检验。
    对每个实体取 Wald χ²_p 统计量，按 DH 的小样本校正标准化得到 Zbar。
    自动跳过奇异/不可识别的实体。
    """
    # 构造滞后
    panel = build_entity_lagged(df, y, x, id_col, t_col, p)
    if len(panel) == 0:
        raise ValueError("没有任何实体满足所选滞后阶。")

    W_list, T_list = [], []
    for _, g in panel.items():
        W, _ = wald_chi2_for_entity(g, y, x, p)
        if W is not None and np.isfinite(W):
            W_list.append(W)
            T_list.append(len(g))  # 用于最小 T 校正

    if len(W_list) == 0:
        raise ValueError("所有实体均不可识别/数值失败，无法进行面板检验。")

    W_arr = np.array(W_list, dtype=float)
    N = len(W_arr)
    T_eff = int(np.min(T_list))

    # DH 标准化（小样本校正；若分母异常则退化到大样本）
    EW = p
    VarW = 2 * p * (2*T_eff - 3*p - 1) / max(T_eff - p - 1, 1)
    if not np.isfinite(VarW) or VarW <= 0:
        VarW = 2.0 * p  # 大样本近似

    Zbar = np.sqrt(N) * ((W_arr.mean() - EW) / np.sqrt(VarW))
    p_val = 2 * (1 - norm.cdf(abs(Zbar)))

    # 单实体显著占比
    sig10 = chi2.ppf(0.90, df=p)
    sig05 = chi2.ppf(0.95, df=p)
    sig01 = chi2.ppf(0.99, df=p)
    share_10 = (W_arr >= sig10).mean()
    share_05 = (W_arr >= sig05).mean()
    share_01 = (W_arr >= sig01).mean()

    return {
        "Zbar": Zbar, "p_value": p_val,
        "N_used": N, "T_min": T_eff, "lags": p,
        "share_sig_10": share_10, "share_sig_05": share_05, "share_sig_01": share_01,
        "W_i": W_arr
    }

# ========= 四个方向：跑起来 =========
def run_four(df, p=2):
    res = {}
    res["VET -> GDP"] = dumitrescu_hurlin(df, "ln_gdp", "ln_vet", ID_COL, TIME_COL, p)
    res["GDP -> VET"] = dumitrescu_hurlin(df, "ln_vet", "ln_gdp", ID_COL, TIME_COL, p)
    res["VET -> EMP"] = dumitrescu_hurlin(df, "ln_emp", "ln_vet", ID_COL, TIME_COL, p)
    res["EMP -> VET"] = dumitrescu_hurlin(df, "ln_vet", "ln_emp", ID_COL, TIME_COL, p)
    return res

results = run_four(df, p=LAGS)
print(pd.DataFrame([{
    "direction": k, "lags": v["lags"],
    "Zbar": round(v["Zbar"], 3), "p_value": round(v["p_value"], 4),
    "N_used": v["N_used"], "T_min": v["T_min"],
    "share_sig_10": round(v["share_sig_10"], 3),
    "share_sig_05": round(v["share_sig_05"], 3),
    "share_sig_01": round(v["share_sig_01"], 3),
} for k, v in results.items()]))


# 汇总成表
summary_rows = []
for name, r in results.items():
    summary_rows.append({
        "direction": name,
        "lags": r["lags"],
        "Zbar": np.round(r["Zbar"], 3),
        "p_value": np.round(r["p_value"], 4),
        "N_used": r["N_used"],
        "T_min": r["T_min"],
        "share_sig_10": np.round(r["share_sig_10"], 3),
        "share_sig_05": np.round(r["share_sig_05"], 3),
        "share_sig_01": np.round(r["share_sig_01"], 3),
    })
panel_gc_table = pd.DataFrame(summary_rows)
print(panel_gc_table)

# 如需导出
panel_gc_table.to_csv(r"D:\Dissertation\dissertation\outputs\panel_granger_DH_lag2.csv", index=False, encoding="utf-8-sig")


  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ

    direction  lags    Zbar  p_value  N_used  T_min  share_sig_10  \
0  VET -> GDP     2   1.002   0.3162     181      7         0.099   
1  GDP -> VET     2  11.773   0.0000     181      7         0.182   
2  VET -> EMP     2   2.227   0.0260     193      8         0.140   
3  EMP -> VET     2   6.829   0.0000     193      8         0.218   

   share_sig_05  share_sig_01  
0         0.050         0.039  
1         0.138         0.072  
2         0.083         0.041  
3         0.166         0.088  
    direction  lags    Zbar  p_value  N_used  T_min  share_sig_10  \
0  VET -> GDP     2   1.002   0.3162     181      7         0.099   
1  GDP -> VET     2  11.773   0.0000     181      7         0.182   
2  VET -> EMP     2   2.227   0.0260     193      8         0.140   
3  EMP -> VET     2   6.829   0.0000     193      8         0.218   

   share_sig_05  share_sig_01  
0         0.050         0.039  
1         0.138         0.072  
2         0.083         0.041  
3         0.166     

  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ²_p 统计量
  W = float(np.atleast_1d(wt.statistic)[0])  # χ