In [1]:
# Notebook: 02_wdi_data_preparation.ipynb
# Purpose: World Bank WDI から必要な指標を南アジア 6 ヵ国、2000-2023 年について取得し、整形して保存する

# 1. ライブラリ読み込み
import pandas as pd
import pandas_datareader.wb as wb
import os

In [4]:
# 2. 設定：対象国リストと指標リスト
countries = ["IN", "BD", "PK", "NP", "LK", "BT"]  # ISO2 コード
years = list(range(2000, 2024))                  # 2000～2023

indicators = {
    # エネルギー構造
    "EG.FEC.RNEW.ZS": "renewable_energy_pct",
    "EG.USE.COMM.FO.ZS": "fossil_fuel_pct",
    "EG.USE.ELEC.KH.PC": "electricity_per_capita",
    "EN.GHG.CO2.PC.CE.AR5": "co2_per_capita",
    # 社会経済
    "NY.GDP.PCAP.CD": "gdp_per_capita",
    "SL.UEM.TOTL.ZS": "unemployment_rate",
    "SH.XPD.CHEX.GD.ZS": "health_expenditure_pct",
    "NV.AGR.TOTL.ZS": "agri_valueadded_pct",
    "SP.URB.TOTL.IN.ZS": "urbanization_pct"
}

In [5]:
# 3. データ取得
# wb.download() より pd.read_wb() を使ったほうがカラム名が設定しやすい
df_list = []
for code, name in indicators.items():
    print(f"Downloading {code} as '{name}' ...")
    df_tmp = wb.download(
        indicator=code,
        country=countries,
        start=years[0],
        end=years[-1]
    ).reset_index()  # ['country', 'year', code]
    df_tmp = df_tmp.rename(columns={code: name})
    df_list.append(df_tmp)

Downloading EG.FEC.RNEW.ZS as 'renewable_energy_pct' ...


  df_tmp = wb.download(


Downloading EG.USE.COMM.FO.ZS as 'fossil_fuel_pct' ...


  df_tmp = wb.download(


Downloading EG.USE.ELEC.KH.PC as 'electricity_per_capita' ...


  df_tmp = wb.download(


Downloading EN.GHG.CO2.PC.CE.AR5 as 'co2_per_capita' ...


  df_tmp = wb.download(


Downloading NY.GDP.PCAP.CD as 'gdp_per_capita' ...


  df_tmp = wb.download(


Downloading SL.UEM.TOTL.ZS as 'unemployment_rate' ...


  df_tmp = wb.download(


Downloading SH.XPD.CHEX.GD.ZS as 'health_expenditure_pct' ...


  df_tmp = wb.download(


Downloading NV.AGR.TOTL.ZS as 'agri_valueadded_pct' ...


  df_tmp = wb.download(


Downloading SP.URB.TOTL.IN.ZS as 'urbanization_pct' ...


  df_tmp = wb.download(


In [31]:
# 4. マージ：すべての指標を country + year で結合
from functools import reduce
df_wdi = reduce(
    lambda left, right: pd.merge(left, right, on=["country", "year"], how="outer"),
    df_list
)

In [32]:
# 5. 列名と ISO2 → ISO3 変換（統一的な国コードにしたい場合）
#    pandas_datareader.wb の国名は ISO2 なので、ISO3 に変換する例
#    （「国名」カラムも一緒に保持されるため省略可）
name_to_iso3 = {
    "India": "IND",
    "Bangladesh": "BGD",
    "Pakistan": "PAK",
    "Nepal": "NPL",
    "Sri Lanka": "LKA",
    "Bhutan": "BTN"
}
df_wdi["iso3"] = df_wdi["country"].map(name_to_iso3)

# （必要であれば後で ISO2 コードも作成）
iso3_to_iso2 = {v: k for k, v in name_to_iso3.items()}  # 逆引きマップ
df_wdi["iso2"] = df_wdi["iso3"].map(iso3_to_iso2)

In [33]:
# 6. 保存用に並べ替え
df_wdi = df_wdi[[
    "iso3", "year",
    "renewable_energy_pct", "fossil_fuel_pct", "electricity_per_capita", "co2_per_capita",
    "gdp_per_capita", "unemployment_rate", "health_expenditure_pct", "agri_valueadded_pct", "urbanization_pct"
]]

In [34]:
# 7. CSV 保存
os.makedirs("../data/processed/wdi", exist_ok=True)
df_wdi.to_csv("../data/processed/wdi/wdi_southasia_2000_2023.csv", index=False)
print("Saved: ../data/processed/wdi/wdi_southasia_2000_2023.csv")

# 8. データサンプル表示
df_wdi.head(10)

Saved: ../data/processed/wdi/wdi_southasia_2000_2023.csv


Unnamed: 0,iso3,year,renewable_energy_pct,fossil_fuel_pct,electricity_per_capita,co2_per_capita,gdp_per_capita,unemployment_rate,health_expenditure_pct,agri_valueadded_pct,urbanization_pct
0,BGD,2000,60.2,57.93,99.253551,0.200483,396.67073,3.27,1.805045,22.718148,23.59
1,BGD,2001,55.9,61.28,109.490964,0.238508,394.65641,3.61,1.772019,21.848341,24.096
2,BGD,2002,54.4,61.25,117.271799,0.24717,393.886422,3.966,1.934171,20.584134,24.756
3,BGD,2003,52.6,62.48,123.721051,0.255251,426.748808,4.32,1.961522,19.812718,25.429
4,BGD,2004,52.0,62.76,158.478288,0.265234,455.614017,4.314,1.998543,19.26696,26.114
5,BGD,2005,50.6,64.04,169.186795,0.278993,480.085851,4.25,2.058739,18.57104,26.809
6,BGD,2006,48.6,65.94,189.59609,0.296997,490.388027,3.591,2.106466,18.034017,27.517
7,BGD,2007,47.2,67.11,198.744128,0.312232,537.955493,4.017,2.172685,17.806537,28.237
8,BGD,2008,45.2,68.29,200.690067,0.343316,613.062041,4.492,2.118788,17.595696,28.968
9,BGD,2009,43.1,69.88,218.832023,0.365306,679.211477,5.0,2.183217,17.104629,29.709
