In [1]:
import pandas as pd
import requests

# Fetch the data.
df = pd.read_csv("https://ourworldindata.org/grapher/gdp-per-capita-worldbank.csv?v=1&csvType=full&useColumnShortNames=true", storage_options = {'User-Agent': 'Our World In Data data fetch/1.0'})

# Fetch the metadata
metadata = requests.get("https://ourworldindata.org/grapher/gdp-per-capita-worldbank.metadata.json?v=1&csvType=full&useColumnShortNames=true").json()

In [2]:
df

Unnamed: 0,Entity,Code,Year,ny_gdp_pcap_pp_kd
0,Afghanistan,AFG,2000,1617.8264
1,Afghanistan,AFG,2001,1454.1108
2,Afghanistan,AFG,2002,1774.3087
3,Afghanistan,AFG,2003,1815.9282
4,Afghanistan,AFG,2004,1776.9182
...,...,...,...,...
7058,Zimbabwe,ZWE,2019,3294.8062
7059,Zimbabwe,ZWE,2020,2987.2698
7060,Zimbabwe,ZWE,2021,3184.7854
7061,Zimbabwe,ZWE,2022,3323.1218


In [37]:
df = pd.read_csv("OECD,DF_BLI,+all.csv")

# Tomamos las columnas que nos importan
df = df.drop(columns = [columns for columns in df.columns if columns not in ["LOCATION", "INDICATOR", "OBS_VALUE", "UNIT_MEASURE", "INEQUALITY"]])
df = df[df["INEQUALITY"] == "TOT"]

# Primero el dataframe de life expectancy (HS_LEB), lo limpiamos
life_expectancy = df[df["INDICATOR"] == "HS_LEB"]
print(life_expectancy["UNIT_MEASURE"].unique())  # Es de tipo YR (años)

# Ahora el dataframe de homicide rate (PS_REPH)
homicide_rate = df[df["INDICATOR"] == "PS_REPH"]
print(homicide_rate["UNIT_MEASURE"].unique())  # Es de tipo RATIO 

df

['YR']
['RATIO']


Unnamed: 0,LOCATION,INDICATOR,INEQUALITY,OBS_VALUE,UNIT_MEASURE
0,AUT,JE_LMIS,TOT,2.3,PC
1,BEL,JE_LMIS,TOT,2.4,PC
2,CAN,JE_LMIS,TOT,3.8,PC
3,CZE,JE_LMIS,TOT,2.3,PC
4,DNK,JE_LMIS,TOT,4.5,PC
...,...,...,...,...,...
2360,CRI,HO_NUMR,TOT,1.2,RATIO
2361,CRI,IW_HADI,TOT,16517.0,USD
2362,CRI,EQ_AIRP,TOT,17.5,MICRO_M3
2363,CRI,HS_LEB,TOT,80.5,YR


In [30]:
life_expectancy

Unnamed: 0,LOCATION,INDICATOR,INEQUALITY,OBS_VALUE,UNIT_MEASURE
2032,AUS,HS_LEB,TOT,83.0,YR
2033,AUT,HS_LEB,TOT,82.0,YR
2034,BEL,HS_LEB,TOT,82.1,YR
2035,CAN,HS_LEB,TOT,82.1,YR
2036,CZE,HS_LEB,TOT,79.3,YR
2037,DNK,HS_LEB,TOT,81.5,YR
2038,FIN,HS_LEB,TOT,82.1,YR
2039,FRA,HS_LEB,TOT,82.9,YR
2040,DEU,HS_LEB,TOT,81.4,YR
2041,GRC,HS_LEB,TOT,81.7,YR


In [31]:
homicide_rate

Unnamed: 0,LOCATION,INDICATOR,INEQUALITY,OBS_VALUE,UNIT_MEASURE
2150,AUS,PS_REPH,TOT,0.9,RATIO
2151,AUT,PS_REPH,TOT,0.5,RATIO
2152,BEL,PS_REPH,TOT,1.1,RATIO
2153,CAN,PS_REPH,TOT,1.2,RATIO
2154,CZE,PS_REPH,TOT,0.7,RATIO
2155,DNK,PS_REPH,TOT,0.5,RATIO
2156,FIN,PS_REPH,TOT,1.2,RATIO
2157,FRA,PS_REPH,TOT,0.4,RATIO
2158,DEU,PS_REPH,TOT,0.4,RATIO
2159,GRC,PS_REPH,TOT,1.0,RATIO


In [44]:
# Vamos a normalizar los datos

# Primero de life expectancy
min_LE = life_expectancy['OBS_VALUE'].min()
max_LE = life_expectancy['OBS_VALUE'].max()
print(min_LE, max_LE)

life_expectancy_n = (life_expectancy['OBS_VALUE'] - min_LE) / (max_LE - min_LE)

# Ahora para homicide rate
min_HR = homicide_rate['OBS_VALUE'].min()
max_HR = homicide_rate['OBS_VALUE'].max()
print(min_HR, max_HR)

homicide_rate_n = (homicide_rate['OBS_VALUE'] - min_HR) / (max_HR - min_HR)

print(f"life expectancy normalized: max -> {life_expectancy_n.max()}, min -> {life_expectancy_n.min()}, avg -> {life_expectancy_n.mean()}")
print(f"homicide rate normalized: max -> {homicide_rate_n.max()}, min -> {homicide_rate_n.min()}, avg -> {homicide_rate_n.mean()}")


64.2 84.4
0.2 26.8
life expectancy normalized: max -> 1.0, min -> 0.0, avg -> 0.7959688826025457
homicide rate normalized: max -> 1.0, min -> 0.0, avg -> 0.11555674901539562
