In [53]:
!pip install wbgapi


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [54]:
import pandas as pd
import wbgapi as wb
import pycountry
import requests
import io


# Загрузка данных World Bank

In [59]:
import wbgapi as wb
import pandas as pd

wb_indicators = {
    'NY.GDP.MKTP.CD': 'gdp_current_usd',
    'NY.GDP.PCAP.CD': 'gdp_per_capita',
    'FP.CPI.TOTL.ZG': 'inflation_rate',
    'SL.UEM.TOTL.ZS': 'unemployment_rate',
    'NE.TRD.GNFS.ZS': 'trade_percent_gdp',
    'NE.EXP.GNFS.ZS': 'exports_percent_gdp',
    'NE.IMP.GNFS.ZS': 'imports_percent_gdp',
    'NY.GNS.ICTR.ZS': 'gross_savings_percent_gdp',
    'SE.XPD.TOTL.GD.ZS': 'education_expenditure_percent_gdp',
    'SH.XPD.CHEX.GD.ZS': 'health_expenditure_percent_gdp',
    'SP.POP.TOTL': 'population',
    'SP.URB.TOTL.IN.ZS': 'urban_population_percent',
    'EN.ATM.CO2E.PC': 'co2_emissions_per_capita'
}

wb_data_list = []

for indicator_code, indicator_name in wb_indicators.items():
    try:
        # wbgapi uses slightly different syntax
        df = wb.data.DataFrame(indicator_code, mrv=5)
        df = df.reset_index()
        df = df.melt(id_vars=['economy'], var_name='year', value_name=indicator_name)
        df['year'] = df['year'].str.replace('YR', '').astype(int)
        wb_data_list.append(df)
    except Exception as e:
        pass

wb_df = wb_data_list[0]

for df in wb_data_list[1:]:
    wb_df = wb_df.merge(df, on=['economy', 'year'], how='outer')


In [60]:
wb_df

Unnamed: 0,economy,year,gdp_current_usd,gdp_per_capita,inflation_rate,unemployment_rate,trade_percent_gdp,exports_percent_gdp,imports_percent_gdp,gross_savings_percent_gdp,education_expenditure_percent_gdp,health_expenditure_percent_gdp,population,urban_population_percent
0,ABW,2019,,,,,,,,,,,,
1,ABW,2020,2.481857e+09,22855.932320,,,124.137267,52.992604,71.144663,1.355999,,,108587.0,43.697
2,ABW,2021,2.929447e+09,27200.061079,,,147.015577,70.740044,76.275533,9.935382,3.618558,,107700.0,43.866
3,ABW,2022,3.279344e+09,30559.533535,,,172.884172,87.570280,85.313891,17.205179,,,107310.0,44.052
4,ABW,2023,3.648573e+09,33984.790620,,,169.798283,88.343786,81.454496,17.566601,,,107359.0,44.254
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1591,ZWE,2020,2.686856e+10,1730.453910,557.201817,8.621,47.313382,22.293071,25.020311,15.097106,,2.954401,15526888.0,32.242
1592,ZWE,2021,2.724051e+10,1724.387271,98.546105,9.540,50.847123,22.775238,28.071885,17.237568,,2.785717,15797210.0,32.303
1593,ZWE,2022,3.278966e+10,2040.546587,104.705171,10.087,64.763607,27.872170,36.891437,12.547904,,3.628807,16069056.0,32.395
1594,ZWE,2023,3.523137e+10,2156.034093,,8.759,50.794964,21.579400,29.215564,16.654035,0.384771,,16340822.0,32.517


# Загрузка данных World Health Organization (WHO)

In [None]:
import requests
import pandas as pd


def get_who_indicator(indicator_code):
    url = f"https://ghoapi.azureedge.net/api/{indicator_code}"
    
    response = requests.get(url)
    response.raise_for_status()
    data = response.json()
    records = data['value']
    df = pd.DataFrame(records)
    df = df[['SpatialDim', 'TimeDim', 'NumericValue']]
    df.columns = ['ISO3', 'Year', f'WHO_{indicator_code}']
    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
    df = df.dropna(subset=['Year'])
    df['Year'] = df['Year'].astype(int)
    df = df.sort_values('Year').groupby('ISO3').tail(1)

    return df.set_index('ISO3')[[f'WHO_{indicator_code}']]

df_who = pd.concat(
    objs=[
        get_who_indicator("HWF_0001"),  		# Medical doctors (per 10 000 population)
        get_who_indicator("WHOSIS_000001"), 	# Life expectancy at birth
        get_who_indicator("MDG_0000000001"),	# Infant mortality rate
        get_who_indicator("NCDMORT3070"),		# Probability of dying from NCDs between ages 30-70
        get_who_indicator("AIR_41")				# Ambient air pollution
	],
    axis=1
)
df_who.columns = ["medical_doctors", "life_expectancy", "infant_mortality_rate", "probability_of_dying", "air_pollution"]
df_who["economy"] = df_who.index
df_who.head()

Unnamed: 0_level_0,medical_doctors,life_expectancy,infant_mortality_rate,probability_of_dying,air_pollution
ISO3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NIU,16.67,,19.997473,,
VCT,9.38,72.557094,10.69685,23.6,0.175
MHL,4.67,,26.323793,,
KIR,1.93,60.873456,43.657171,44.1,5.34
SOM,0.48,51.749825,73.148684,27.6,172.992


# Загрузка данных UNIDO

In [33]:
unido_url = "https://stat.unido.org/content/api/download/competitive-industrial-performance-index-latest"

unido_indicators = {
    'manufacturing_value_added_per_capita': 'NV.IND.MANF.CD',
    'manufacturing_percent_gdp': 'NV.IND.MANF.ZS',
    'industry_value_added_percent_gdp': 'NV.IND.TOTL.ZS'
}

unido_data_list = []

for indicator_name, indicator_code in unido_indicators.items():
    try:
        df = wb.data.DataFrame(indicator_code, mrv=5)
        df = df.reset_index()
        df = df.melt(id_vars=['economy'], var_name='year', value_name=indicator_name)
        df['year'] = df['year'].str.replace('YR', '').astype(int)
        unido_data_list.append(df)
    except Exception as e:
        print(f"Ошибка при загрузке {indicator_name}: {e}")


In [34]:
if len(unido_data_list) > 0:
    unido_df = unido_data_list[0]
    
    for df in unido_data_list[1:]:
        unido_df = unido_df.merge(df, on=['economy', 'year'], how='outer')    
    unido_df.head()
else:
    print("Не удалось загрузить данные UNIDO")
    unido_df = pd.DataFrame()


In [35]:
wb_full = wb_df.merge(unido_df, on=['economy', 'year'], how='outer')

In [92]:
wb_full

Unnamed: 0,economy,year,gdp_current_usd,gdp_per_capita,inflation_rate,unemployment_rate,trade_percent_gdp,exports_percent_gdp,imports_percent_gdp,gross_savings_percent_gdp,education_expenditure_percent_gdp,health_expenditure_percent_gdp,population,urban_population_percent,manufacturing_value_added_per_capita,manufacturing_percent_gdp,industry_value_added_percent_gdp
0,ABW,2019,,,,,,,,,,,,,,,
1,ABW,2020,2.481857e+09,22855.932320,,,124.137267,52.992604,71.144663,1.355999,,,108587.0,43.697,,,
2,ABW,2021,2.929447e+09,27200.061079,,,147.015577,70.740044,76.275533,9.935382,3.618558,,107700.0,43.866,,,
3,ABW,2022,3.279344e+09,30559.533535,,,172.884172,87.570280,85.313891,17.205179,,,107310.0,44.052,,,
4,ABW,2023,3.648573e+09,33984.790620,,,169.798283,88.343786,81.454496,17.566601,,,107359.0,44.254,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1591,ZWE,2020,2.686856e+10,1730.453910,557.201817,8.621,47.313382,22.293071,25.020311,15.097106,,2.954401,15526888.0,32.242,4.217442e+09,15.696567,32.767518
1592,ZWE,2021,2.724051e+10,1724.387271,98.546105,9.540,50.847123,22.775238,28.071885,17.237568,,2.785717,15797210.0,32.303,3.389518e+09,12.442934,28.805586
1593,ZWE,2022,3.278966e+10,2040.546587,104.705171,10.087,64.763607,27.872170,36.891437,12.547904,,3.628807,16069056.0,32.395,6.732853e+09,20.533465,39.691641
1594,ZWE,2023,3.523137e+10,2156.034093,,8.759,50.794964,21.579400,29.215564,16.654035,0.384771,,16340822.0,32.517,5.788329e+09,16.429474,26.301060


In [94]:
wb_full.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1596 entries, 0 to 1595
Data columns (total 17 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   economy                               1596 non-null   object 
 1   year                                  1596 non-null   int64  
 2   gdp_current_usd                       1249 non-null   float64
 3   gdp_per_capita                        1249 non-null   float64
 4   inflation_rate                        1108 non-null   float64
 5   unemployment_rate                     1166 non-null   float64
 6   trade_percent_gdp                     1057 non-null   float64
 7   exports_percent_gdp                   1066 non-null   float64
 8   imports_percent_gdp                   1076 non-null   float64
 9   gross_savings_percent_gdp             887 non-null    float64
 10  education_expenditure_percent_gdp     818 non-null    float64
 11  health_expenditur

# Объединение датасетов

In [103]:
wb_grouped = wb_full.groupby('economy').mean(numeric_only=True).drop(columns=['year'])
final_df = wb_grouped.merge(df_who.set_index('economy'), left_index=True, right_index=True, how='left')
final_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 266 entries, ABW to ZWE
Data columns (total 20 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   gdp_current_usd                       258 non-null    float64
 1   gdp_per_capita                        258 non-null    float64
 2   inflation_rate                        226 non-null    float64
 3   unemployment_rate                     235 non-null    float64
 4   trade_percent_gdp                     225 non-null    float64
 5   exports_percent_gdp                   226 non-null    float64
 6   imports_percent_gdp                   228 non-null    float64
 7   gross_savings_percent_gdp             202 non-null    float64
 8   education_expenditure_percent_gdp     231 non-null    float64
 9   health_expenditure_percent_gdp        240 non-null    float64
 10  population                            265 non-null    float64
 11  urban_population_perce

In [104]:
final_df.to_csv("data/wb_unido_who_dataset.csv")