### instalar API e importar bibliotecas:

In [1]:
pip install wbgapi

Note: you may need to restart the kernel to use updated packages.


In [2]:
import wbgapi as wb
import pandas as pd

### Seleção das informações (series) que serão incluidas na tabela para análise:

##### Para encontrar os códigos referentes a cada indicador, basta ir em https://data.worldbank.org/indicator, selecionar o indicador e copiar o que tem escrito depois da ultima barra da url.

In [3]:
paises = ['BRA', 'ARG', 'BOL', 'CHL', 'COL', 'ECU', 'PRY', 'PER', 'URY', 'VEN']
periodo = range(2000, 2023)
indicadores = {
    "SP.POP.TOTL": "População Total",
    "SP.DYN.LE00.IN": "Expectativa de Vida",
    "SH.DYN.MORT": "Mortalidade Infantil (por 1000)",
    "SP.ADO.TFRT": "Taxa de fertilidade na adolescência (nascimentos por 1.000 mulheres de 15 a 19 anos)",
    "SH.XPD.CHEX.GD.ZS": "Gastos com Saúde (% do PIB)",
    "SN.ITK.DEFC.ZS": "Prevalência de desnutrição (% da população)",
    "SL.UEM.TOTL.FE.ZS": "Desemprego Feminino (%)",
    "SL.UEM.TOTL.MA.ZS": "Desemprego Masculino (%)",
    "SI.POV.DDAY" : "Taxa de pobreza de US$ 3,00 por dia (%)",
    "EG.ELC.ACCS.ZS": "Acesso à eletricidade (% da população)",
    "SH.STA.BASS.ZS": "Acesso ao Saneamento Básico (%)",
    "SH.H2O.BASW.ZS": "Acesso à Água Potável (%)",
    "NY.GDP.PCAP.KD": "PIB per capita",
    "E.ADT.LITR.ZS": "Taxa de alfabetização, total de adultos (% de pessoas com 15 anos ou mais)",
    "SE.PRM.NENR": "Matrícula escolar primária (% líquida)",
    "SE.SEC.NENR": "Matrícula escolar secundária (% líquida)",
    "SE.TER.ENRR": "Matrícula escolar, terciário (% bruto)",
}

df_data = wb.data.DataFrame(
    list(indicadores.keys()), 
    paises, 
    time=periodo,
    labels=True
)


df_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Country,Series,YR2000,YR2001,YR2002,YR2003,YR2004,YR2005,YR2006,YR2007,...,YR2013,YR2014,YR2015,YR2016,YR2017,YR2018,YR2019,YR2020,YR2021,YR2022
economy,series,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
VEN,SP.POP.TOTL,"Venezuela, RB","Population, total",24526708.0,24979922.0,25435671.0,25892703.0,26340838.0,26785804.0,27224686.0,27649363.0,...,29912198.0,30254079.0,30573972.0,30765720.0,30565323.0,29807592.0,28938098.0,28444077.0,28237826.0,28213017.0
URY,SP.POP.TOTL,Uruguay,"Population, total",3266206.0,3274251.0,3278867.0,3281186.0,3283148.0,3285771.0,3289506.0,3295153.0,...,3345337.0,3356334.0,3368017.0,3379283.0,3388438.0,3394534.0,3397206.0,3398968.0,3396695.0,3390913.0
PER,SP.POP.TOTL,Peru,"Population, total",26598331.0,26964601.0,27288217.0,27580490.0,27852190.0,28101180.0,28324491.0,28530699.0,...,29817919.0,30115826.0,30457600.0,30866494.0,31324637.0,31897584.0,32449303.0,32838579.0,33155882.0,33475438.0
PRY,SP.POP.TOTL,Paraguay,"Population, total",5100868.0,5187689.0,5261328.0,5326436.0,5388175.0,5448097.0,5505148.0,5559386.0,...,5982445.0,6069811.0,6159103.0,6249126.0,6338660.0,6427382.0,6515058.0,6603739.0,6684182.0,6760464.0
ECU,SP.POP.TOTL,Ecuador,"Population, total",12689206.0,12910740.0,13138472.0,13372306.0,13608701.0,13846163.0,14086131.0,14328773.0,...,15807128.0,16035124.0,16266225.0,16505139.0,16759519.0,17049547.0,17340021.0,17546065.0,17682454.0,17823897.0


In [None]:
df_data.shape

In [None]:
df_data.dtypes

In [None]:
df_data.columns.tolist()


### Tranformar os anos em linhas e o título das series em colunas e adaptar a escrita e o tipo dos anos:

In [4]:
df_long = df_data.melt(
    id_vars=['Country', 'Series'],   
    var_name='Ano',
    value_name='Valor'
)

df_long['Ano'] = df_long['Ano'].str.replace('YR', '').astype(int)


In [5]:
df_long.dtypes

Country     object
Series      object
Ano          int64
Valor      float64
dtype: object

In [6]:
df_data = df_long.pivot_table(
    index=['Country', 'Ano'],
    columns='Series',
    values='Valor'
).reset_index()

### Tabela pronta para análise:

In [7]:
df_data.head()

Series,Country,Ano,Access to electricity (% of population),"Adolescent fertility rate (births per 1,000 women ages 15-19)",Current health expenditure (% of GDP),GDP per capita (constant 2015 US$),"Life expectancy at birth, total (years)","Mortality rate, under-5 (per 1,000 live births)",People using at least basic drinking water services (% of population),People using at least basic sanitation services (% of population),"Population, total",Poverty headcount ratio at $3.00 a day (2021 PPP) (% of population),Prevalence of undernourishment (% of population),"School enrollment, primary (% net)","School enrollment, secondary (% net)","School enrollment, tertiary (% gross)","Unemployment, female (% of female labor force) (modeled ILO estimate)","Unemployment, male (% of male labor force) (modeled ILO estimate)"
0,Argentina,2000,95.7,65.808,8.220011,10631.650364,73.91,19.4,96.284026,87.666927,37213984.0,7.3,,98.92861,78.48781,54.333271,16.356,14.029
1,Argentina,2001,95.5,63.491,8.371799,10051.944846,74.154,18.8,96.52282,88.135939,37624825.0,10.7,2.9,99.0026,78.94955,58.791691,17.191,17.411
2,Argentina,2002,96.1,62.126,7.393312,8861.561993,74.312,18.3,96.755469,88.603394,38029349.0,17.1,3.7,98.95948,78.81052,62.059261,18.823,20.122
3,Argentina,2003,96.3,60.002,6.894516,9545.531941,74.307,17.8,96.982417,89.069563,38424282.0,8.2,4.3,98.87379,78.51365,64.402451,17.577,13.806
4,Argentina,2004,96.5,61.681,7.228683,10302.446532,74.871,17.3,97.204346,89.535068,38815916.0,6.0,3.9,98.57855,78.53653,64.887848,15.823,11.893


In [8]:
df_data.shape

(230, 18)

In [None]:
df_data.columns.tolist()

In [None]:
df_data.isna().sum()

In [None]:
df_data.dtypes