In [3]:
import pandas as pd
import json
from pathlib import Path
from typing import Dict

def load_standard_mapping(config_path: Path) -> Dict[str, str]:
    """从配置文件构建 L3代码 到 L2标准名称 的映射表"""
    with open(config_path, 'r', encoding='utf-8') as f:
        config = json.load(f)
    
    # 提取映射: { "LEV3_FIT": "Electricity – market-based instruments", ... }
    return {
        l3_code: info["L2"] 
        for l3_code, info in config.get("level_mapping", {}).items() 
        if info.get("L2")
    }

def calculate_policy_metrics(input_path: Path, config_path: Path, output_dir: Path) -> None:
    """
    计算政策广度和强度，并强制使用 Config 标准名称输出宽表。
    
    Args:
        input_path (Path): 输入 CSV 路径。
        config_path (Path): 配置文件路径。
        output_dir (Path): 输出目录。
    """
    df = pd.read_csv(input_path)
    l3_to_l2_std = load_standard_mapping(config_path)

    # 1. 标准化 L2 名称 (关键步骤：确保与 JSON 严丝合缝)
    df['CLIM_ACT_POL_L2'] = df['CLIM_ACT_POL_L3'].map(l3_to_l2_std)
    df = df.dropna(subset=['CLIM_ACT_POL_L2']) # 剔除未定义映射的脏数据

    # 2. 数据清洗: N (不显著) 归零
    df.loc[df['OBS_STATUS'] == 'N', 'OBS_VALUE'] = 0

    # 3. 聚合计算
    grouped = df.groupby(['REF_AREA', 'TIME_PERIOD', 'CLIM_ACT_POL_L2'])
    stats = grouped.agg(
        Implemented_Count=('OBS_VALUE', lambda x: (x > 0).sum()),
        Sum_Stringency=('OBS_VALUE', 'sum'),
        Valid_L3_Count=('CLIM_ACT_POL_L3', 'nunique')
    ).reset_index()

    # 4. 指标生成
    stats['Breadth'] = stats['Implemented_Count'] / stats['Valid_L3_Count']
    stats['Intensity'] = stats['Sum_Stringency'] / stats['Valid_L3_Count']

    # 5. 格式化输出 (宽表 + 清洗年份)
    for metric in ['Breadth', 'Intensity']:
        pivot_df = stats.pivot(
            index=['REF_AREA', 'TIME_PERIOD'], 
            columns='CLIM_ACT_POL_L2', 
            values=metric
        ).fillna(0).reset_index()

        # 确保年份为整型
        pivot_df = pivot_df[pd.to_numeric(pivot_df['TIME_PERIOD'], errors='coerce').notna()]
        pivot_df['TIME_PERIOD'] = pivot_df['TIME_PERIOD'].astype(int)

        out_path = output_dir / f"2-1-country_{metric.lower()}.parquet"
        pivot_df.to_parquet(out_path, index=False)
        print(f"[{metric}] Saved to: {out_path.name} (Shape: {pivot_df.shape})")

if __name__ == "__main__":
    base_dir = Path.cwd().parent
    data_dir = base_dir / "data"
    
    calculate_policy_metrics(
        input_path=data_dir / "1-2-L2_L3_pairs_selected.csv",
        config_path=data_dir / "config_mappings.json",
        output_dir=data_dir
    )

[Breadth] Saved to: 2-1-country_breadth.parquet (Shape: (1666, 17))
[Intensity] Saved to: 2-1-country_intensity.parquet (Shape: (1666, 17))
