In [41]:
pip install wbgapi pandas

Note: you may need to restart the kernel to use updated packages.


In [52]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


In [109]:
pip install pandas pycountry

Collecting pycountry
  Downloading pycountry-24.6.1-py3-none-any.whl.metadata (12 kB)
Downloading pycountry-24.6.1-py3-none-any.whl (6.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pycountry
Successfully installed pycountry-24.6.1
Note: you may need to restart the kernel to use updated packages.


In [43]:
import wbgapi as wb
import pandas as pd
import altair as alt

In [53]:
import requests

In [110]:
import pycountry

# Chart 1

In [150]:
df = pd.read_csv('p1/p1.csv')

In [151]:
# 3. Función para obtener el código ISO-3 numérico (necesario para el mapa de Vega)
def get_iso3_numeric(country_name):
    # Correcciones manuales para nombres que pycountry no reconoce directamente
    manual_map = {
        "Congo, Dem. Rep.": "180", # COD
        "Congo, Rep.": "178",      # COG
        "Egypt, Arab Rep.": "818", # EGY
        "Hong Kong SAR, China": "344", # HKG
        "Iran, Islamic Rep.": "364", # IRN
        "Korea, Rep.": "410", # KOR
        "Kyrgyz Republic": "417", # KGZ
        "Macedonia, FYR": "807", # MKD
        "Russia": "643",
        "Russian Federation": "643",
        "Slovak Republic": "703",
        "Taiwan, China": "158",
        "Venezuela, RB": "862",
        "West Bank and Gaza": "275", # Palestina
        "Yemen, Rep.": "887",
        "Côte d'Ivoire": "384",
        "Laos": "418",
        "Syria": "760"
    }
    
    if country_name in manual_map:
        return int(manual_map[country_name])
        
    try:
        # Busqueda difusa (fuzzy) para encontrar el país
        match = pycountry.countries.search_fuzzy(country_name)
        if match:
            return int(match[0].numeric)
    except:
        return None

In [152]:
# Aplicar la función
df['id'] = df['Country'].apply(get_iso3_numeric)

In [None]:
# 4. Eliminar países que no se encontraron (si los hay)
df = df.dropna(subset=['id'])

In [156]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 142 entries, 0 to 142
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Country               142 non-null    object 
 1   financial knowledge   142 non-null    float64
 2   Risk Diversification  142 non-null    float64
 3   Inflation             142 non-null    float64
 4   Interest              142 non-null    float64
 5   Interest Compounding  142 non-null    float64
 6   id                    142 non-null    float64
dtypes: float64(6), object(1)
memory usage: 8.9+ KB


In [154]:
df

Unnamed: 0,Country,financial knowledge,Risk Diversification,Inflation,Interest,Interest Compounding,id
0,Afghanistan,0.14,0.19,0.35,0.40,0.26,4.0
1,Albania,0.14,0.11,0.50,0.33,0.30,8.0
2,Algeria,0.33,0.34,0.73,0.57,0.35,12.0
3,Angola,0.15,0.38,0.17,0.26,0.54,24.0
4,Argentina,0.28,0.33,0.65,0.45,0.31,32.0
...,...,...,...,...,...,...,...
138,Vietnam,0.24,0.25,0.55,0.31,0.46,704.0
139,West Bank and Gaza,0.25,0.35,0.59,0.37,0.31,275.0
140,"Yemen, Rep.",0.13,0.28,0.44,0.16,0.28,887.0
141,Zambia,0.40,0.54,0.51,0.45,0.56,894.0


In [157]:
# 1. Convertir las columnas de porcentajes a FLOAT (decimales)
# Usamos 'to_numeric' con errors='coerce' para forzar la conversión
metricas = [
    'financial knowledge', 
    'Risk Diversification', 
    'Inflation', 
    'Interest', 
    'Interest Compounding'
]

for col in metricas:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# 2. Arreglar la columna 'id' (CRÍTICO para el mapa)
# El mapa espera el ID 4, no 4.0.
# Primero lo convertimos a numérico
df['id'] = pd.to_numeric(df['id'], errors='coerce')

# Eliminamos filas que no tengan ID (si hay alguna basura)
df = df.dropna(subset=['id'])

# Convertimos a ENTERO (int) para quitar el decimal .0
# Esto hará que en el CSV se guarde como 4, 12, 32 en lugar de 4.0, 12.0
df['id'] = df['id'].astype(int)

# 3. Verificar los tipos de datos
# Deberías ver: float64 para las métricas e int64 para el id
print("Tipos de datos después de la conversión:")
print(df.dtypes)

Tipos de datos después de la conversión:
Country                  object
financial knowledge     float64
Risk Diversification    float64
Inflation               float64
Interest                float64
Interest Compounding    float64
id                        int64
dtype: object


In [158]:
# 6. Guardar el archivo final
output_filename = 'p1/p1_clean.csv'
df.to_csv(output_filename, index=False)

print(f"Archivo '{output_filename}' generado con éxito.")
print(df.head())

Archivo 'p1/p1_clean.csv' generado con éxito.
       Country  financial knowledge  Risk Diversification  Inflation  \
0  Afghanistan                 0.14                  0.19       0.35   
1      Albania                 0.14                  0.11       0.50   
2      Algeria                 0.33                  0.34       0.73   
3       Angola                 0.15                  0.38       0.17   
4    Argentina                 0.28                  0.33       0.65   

   Interest  Interest Compounding  id  
0      0.40                  0.26   4  
1      0.33                  0.30   8  
2      0.57                  0.35  12  
3      0.26                  0.54  24  
4      0.45                  0.31  32  


# Chart 2

In [38]:
# World Bank API wbgapi

# Ver todos los indicadores (son miles)
#wb.series.info()

# Buscar indicadores por palabra clave
#indicators = wb.series.list(q='account')

# Ver detalles de un indicador específico
#wb.series.metadata.get('NY.GDP.PCAP.CD')

In [3]:
# 1. Define the indicators from Global Findex and World Bank
# fx.acc.t.d: % of adults with a bank account
# fx.sav.fin.t.d: % of adults who saved at a financial institution
# NY.GDP.PCAP.CD: GDP per capita (current US$)
indicators = {
    'FX.OWN.TOTL.ZS': 'Account_Ownership',
    'NY.GNS.ICTR.ZS': 'Gross_Savings',
    'NY.GDP.PCAP.CD': 'GDP_per_Capita'
}

In [4]:
year_to_fetch = 2024

In [13]:
df = wb.data.DataFrame(indicators.keys(), time=year_to_fetch, labels=True, skipAggs=True).reset_index()

In [14]:
df

Unnamed: 0,economy,Country,FX.OWN.TOTL.ZS,NY.GDP.PCAP.CD,NY.GNS.ICTR.ZS
0,ZWE,Zimbabwe,49.521602,2497.203322,10.700213
1,ZMB,Zambia,72.702425,1187.109434,34.421066
2,YEM,"Yemen, Rep.",,,
3,PSE,West Bank and Gaza,39.620984,2592.305912,1.883263
4,VIR,Virgin Islands (U.S.),,,
...,...,...,...,...,...
212,AND,Andorra,,49303.649167,
213,ASM,American Samoa,,,
214,DZA,Algeria,35.290107,5752.990767,39.419856
215,ALB,Albania,46.069251,11377.775743,22.501612


In [27]:
df_regions = pd.DataFrame(wb.economy.list())[['id', 'value', 'region']]

In [None]:
# 3. Cleaning and renaming
df = df.rename(columns={
        'economy': 'ISO3',
        'Country': 'country_name',
        'NY.GNS.ICTR.ZS': 'gross_savings_GDP',
        'FX.OWN.TOTL.ZS': 'account_ownership',
        'NY.GDP.PCAP.CD': 'GDP_per_capita'
    })

In [29]:
df_final = pd.merge(df, df_regions[['id', 'region']], left_on='ISO3', right_on='id', how='left')

In [30]:
df

Unnamed: 0,ISO3,country_name,account_ownership,GDP_per_capita,gross_savings_GDP
0,ZWE,Zimbabwe,49.521602,2497.203322,10.700213
1,ZMB,Zambia,72.702425,1187.109434,34.421066
2,YEM,"Yemen, Rep.",,,
3,PSE,West Bank and Gaza,39.620984,2592.305912,1.883263
4,VIR,Virgin Islands (U.S.),,,
...,...,...,...,...,...
212,AND,Andorra,,49303.649167,
213,ASM,American Samoa,,,
214,DZA,Algeria,35.290107,5752.990767,39.419856
215,ALB,Albania,46.069251,11377.775743,22.501612


In [31]:
df_final

Unnamed: 0,ISO3,country_name,account_ownership,GDP_per_capita,gross_savings_GDP,id,region
0,ZWE,Zimbabwe,49.521602,2497.203322,10.700213,ZWE,SSF
1,ZMB,Zambia,72.702425,1187.109434,34.421066,ZMB,SSF
2,YEM,"Yemen, Rep.",,,,YEM,MEA
3,PSE,West Bank and Gaza,39.620984,2592.305912,1.883263,PSE,MEA
4,VIR,Virgin Islands (U.S.),,,,VIR,LCN
...,...,...,...,...,...,...,...
212,AND,Andorra,,49303.649167,,AND,ECS
213,ASM,American Samoa,,,,ASM,EAS
214,DZA,Algeria,35.290107,5752.990767,39.419856,DZA,MEA
215,ALB,Albania,46.069251,11377.775743,22.501612,ALB,ECS


In [35]:
df_final['region'].unique()

array(['SSF', 'MEA', 'LCN', 'EAS', 'ECS', 'NAC', 'SAS'], dtype=object)

In [32]:
# Limpieza: Eliminar nulos para asegurar que el Bubble Chart funcione
df_clean = df_final.dropna(subset=['gross_savings_GDP', 'account_ownership', 'GDP_per_capita'])
df_clean

Unnamed: 0,ISO3,country_name,account_ownership,GDP_per_capita,gross_savings_GDP,id,region
0,ZWE,Zimbabwe,49.521602,2497.203322,10.700213,ZWE,SSF
1,ZMB,Zambia,72.702425,1187.109434,34.421066,ZMB,SSF
3,PSE,West Bank and Gaza,39.620984,2592.305912,1.883263,PSE,MEA
5,VNM,Viet Nam,70.550753,4717.290287,36.690108,VNM,EAS
8,UZB,Uzbekistan,59.658383,3161.700106,28.297617,UZB,ECS
...,...,...,...,...,...,...,...
206,AUS,Australia,98.010378,64603.985631,22.922852,AUS,EAS
208,ARM,Armenia,71.373473,8556.214070,19.235686,ARM,ECS
209,ARG,Argentina,81.744245,13969.783660,16.697117,ARG,LCN
214,DZA,Algeria,35.290107,5752.990767,39.419856,DZA,MEA


In [36]:
# Guardar para Vega-Lite
df_clean.to_csv('ownership_savings.csv', index=False)
    
print(f"¡Éxito! Datos guardados para {len(df_clean)} países.")
print(df_clean[['country_name', 'gross_savings_GDP', 'account_ownership', 'GDP_per_capita', 'region']].head())

¡Éxito! Datos guardados para 111 países.
         country_name  gross_savings_GDP  account_ownership  GDP_per_capita  \
0            Zimbabwe          10.700213          49.521602     2497.203322   
1              Zambia          34.421066          72.702425     1187.109434   
3  West Bank and Gaza           1.883263          39.620984     2592.305912   
5            Viet Nam          36.690108          70.550753     4717.290287   
8          Uzbekistan          28.297617          59.658383     3161.700106   

  region  
0    SSF  
1    SSF  
3    MEA  
5    EAS  
8    ECS  


# Chart 3

In [103]:
# Cargar tus datos (asegúrate de que el nombre coincida con tu archivo)
df = pd.read_csv('p3/p3.csv') 

In [104]:
df

Unnamed: 0,Country,men,women,gender gap,adults in the richest households,adults in the poorest households,wealth gap,age 35-54,age 15-34,age gap
0,Argentina,32%,24%,8%,33%,21%,12%,31%,31%,-1%
1,Australia,72%,56%,15%,73%,50%,23%,67%,64%,4%
2,Brazil,41%,29%,13%,38%,29%,9%,34%,37%,-3%
3,Canada,77%,60%,17%,73%,61%,12%,75%,66%,10%
4,China,29%,27%,2%,32%,22%,11%,29%,35%,-6%
5,France,56%,48%,8%,55%,47%,8%,58%,46%,12%
6,Germany,72%,60%,12%,73%,55%,17%,82%,72%,10%
7,India,27%,20%,8%,26%,20%,5%,20%,27%,-7%
8,Indonesia,39%,25%,14%,38%,24%,13%,30%,38%,-8%
9,Italy,45%,30%,15%,44%,27%,17%,39%,47%,-8%


In [105]:
# 2. Definimos las columnas que queremos convertir en filas
# Incluimos los grupos y los valores del gap según tu solicitud
columnas_indicadores = [
    'men', 'women', 'gender gap',
    'adults in the richest households', 'adults in the poorest households', 'wealth gap',
    'age 35-54', 'age 15-34', 'age gap'
]

# 3. Convertir a formato Long
df_long = df.melt(
    id_vars=['Country'], 
    value_vars=columnas_indicadores,
    var_name='indicator', 
    value_name='value'
)

# 4. Crear una columna "Category" para facilitar los filtros en Vega-Lite
# Esta lógica agrupa los indicadores en sus respectivos pares de comparación
def asignar_categoria(row):
    ind = row['indicator']
    if ind in ['men', 'women', 'gender gap']:
        return 'Gender'
    elif ind in ['adults in the richest households', 'adults in the poorest households', 'wealth gap']:
        return 'Wealth'
    elif ind in ['age 35-54', 'age 15-34', 'age gap']:
        return 'Age'
    return 'Other'

df_long['category'] = df_long.apply(asignar_categoria, axis=1)



In [106]:
# 5. Guardar el archivo listo para usar en tu repositorio de GitHub
df_long.to_csv('p3/p3_long.csv', index=False)



In [107]:
print(df_long.head(10))

     Country indicator value category
0  Argentina       men   32%   Gender
1  Australia       men   72%   Gender
2     Brazil       men   41%   Gender
3     Canada       men   77%   Gender
4      China       men   29%   Gender
5     France       men   56%   Gender
6    Germany       men   72%   Gender
7      India       men   27%   Gender
8  Indonesia       men   39%   Gender
9      Italy       men   45%   Gender


# Chart 5

In [168]:
df = pd.read_csv('p5/p5.csv')

In [169]:
df = df.rename(columns={'Country or Economy': 'Country'})

In [170]:
# 3. Transformar de Wide a Long Format (Melt)
# Esto pone todos los conceptos en una sola columna llamada 'Concept'
df_long = df.melt(
    id_vars=['Country'], 
    var_name='Concept', 
    value_name='Score'
)

In [171]:
# 4. Asegurar que los Scores sean numéricos
df_long['Score'] = pd.to_numeric(df_long['Score'], errors='coerce')

In [172]:
df_long

Unnamed: 0,Country,Concept,Score
0,Albania,Time value of money,71.100765
1,Brazil,Time value of money,54.400003
2,Cambodia,Time value of money,23.051018
3,Chile,Time value of money,62.046206
4,Costa Rica,Time value of money,53.418803
...,...,...,...
307,Thailand,Risk diversification,68.118048
308,Uruguay,Risk diversification,60.916120
309,Yemen,Risk diversification,57.032996
310,Overall Average,Risk diversification,58.849994


In [173]:
# 5. Guardar el archivo listo para Vega-Lite
df_long.to_csv('p5/p5_long.csv', index=False)

print("Archivo 'p5_long.csv' creado exitosamente.")
print(df_long.head())

Archivo 'p5_long.csv' creado exitosamente.
      Country              Concept      Score
0     Albania  Time value of money  71.100765
1      Brazil  Time value of money  54.400003
2   Cambodia   Time value of money  23.051018
3       Chile  Time value of money  62.046206
4  Costa Rica  Time value of money  53.418803
