In [6]:
import requests
import zipfile
import io
import openpyxl
import pandas as pd

# 1. 下载并解压 WGI Excel 压缩包
ZIP_URL = "https://databank.worldbank.org/data/download/WGI_EXCEL.zip"
zip_bytes = requests.get(ZIP_URL, timeout=120).content

# 2. 我们要提取的六项指标代码
vars6 = ['GE.EST', 'CC.EST', 'RL.EST', 'VA.EST', 'PV.EST', 'RQ.EST']

records = []
with zipfile.ZipFile(io.BytesIO(zip_bytes)) as z:
    # 遍历所有 Excel 文件
    for fname in z.namelist():
        if not fname.lower().endswith(('.xlsx', '.xls')):
            continue
        with z.open(fname) as f:
            wb = openpyxl.load_workbook(io.BytesIO(f.read()), data_only=True)
            if 'Data' not in wb.sheetnames:
                continue
            ws = wb['Data']
            # 实际数据从第 15 行开始
            rows = list(ws.iter_rows(values_only=True))[14:]
            for r in rows:
                iso = r[1]         # 列 B: ISO3
                ind = r[3]         # 列 D: Indicator Code
                if ind not in vars6:
                    continue
                # 提取 1996-2016 年对应的列
                for yr in range(1996, 2017):
                    idx = 4 + (yr - 1996)
                    val = r[idx] if idx < len(r) else None
                    records.append((iso, yr, ind, val))

# 3. 构建 DataFrame 并透视为宽表
wgi_panel = (
    pd.DataFrame(records, columns=['iso3', 'year', 'indicator', 'value'])
      .pivot_table(index=['iso3', 'year'], columns='indicator', values='value')
      .reset_index()
)

# 4. 重命名列
wgi_panel = wgi_panel.rename(columns={
    'GE.EST': 'gov_effect',
    'CC.EST': 'corruption',
    'RL.EST': 'rule_of_law',
    'VA.EST': 'voice',
    'PV.EST': 'stability',
    'RQ.EST': 'reg_quality'
})
wgi_panel.columns.name = None

# 5. 保存完整的 1996-2016 年 WGI 数据
output_path = "WGI_1996_2016.csv"
wgi_panel.to_csv(output_path, index=False)
print(f"WGI_1996_2016.csv saved — rows: {len(wgi_panel)}")


WGI_1996_2016.csv saved — rows: 4394
