* Indicador: Proportion of adults who are obese, 20 years old and over
* ID para extracción: I555b0f6a 
* Fuente :https://gender-data-hub-2-undesa.hub.arcgis.com 

In [25]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [26]:
# Fuente externa - United Nations Statistics Division (UNSD),
df_obesidad= pd.read_excel("./Obese - United Nations Statistics Division.xlsx")
df_obesidad.head()

Unnamed: 0,indicator_id,"Proportion of adults who are obese, 20 years old and over ,by sex",ref_area_desc,iso3,sex_desc,Unnamed: 5,Unnamed: 6,time_period
0,I555b0f6a,1.0,Afghanistan,AFG,Both sexes or no breakdown by sex,,,1990
1,I555b0f6a,1.1,Afghanistan,AFG,Both sexes or no breakdown by sex,,,1991
2,I555b0f6a,1.2,Afghanistan,AFG,Both sexes or no breakdown by sex,,,1992
3,I555b0f6a,1.2,Afghanistan,AFG,Both sexes or no breakdown by sex,,,1993
4,I555b0f6a,1.3,Afghanistan,AFG,Both sexes or no breakdown by sex,,,1994


In [27]:
# Elimino las columnas no necesarias
df_obesidad = df_obesidad.drop("indicator_id", axis=1) 
df_obesidad = df_obesidad.drop("Unnamed: 5", axis=1) 
df_obesidad = df_obesidad.drop("Unnamed: 6", axis=1) 
df_obesidad.head()

Unnamed: 0,"Proportion of adults who are obese, 20 years old and over ,by sex",ref_area_desc,iso3,sex_desc,time_period
0,1.0,Afghanistan,AFG,Both sexes or no breakdown by sex,1990
1,1.1,Afghanistan,AFG,Both sexes or no breakdown by sex,1991
2,1.2,Afghanistan,AFG,Both sexes or no breakdown by sex,1992
3,1.2,Afghanistan,AFG,Both sexes or no breakdown by sex,1993
4,1.3,Afghanistan,AFG,Both sexes or no breakdown by sex,1994


In [28]:
# Cambio nombres de algunas columnas en el df, para que coincida con el df de WB.
df_obesidad.rename(columns={"ref_area_desc": "Country"}, inplace=True)
df_obesidad.rename(columns={"time_period": "Year"}, inplace=True)
df_obesidad.rename(columns={"iso3": "Country_Code"}, inplace=True)
df_obesidad.rename(columns={"sex_desc": "Gender"}, inplace=True)
df_obesidad

Unnamed: 0,"Proportion of adults who are obese, 20 years old and over ,by sex",Country,Country_Code,Gender,Year
0,1.0,Afghanistan,AFG,Both sexes or no breakdown by sex,1990
1,1.1,Afghanistan,AFG,Both sexes or no breakdown by sex,1991
2,1.2,Afghanistan,AFG,Both sexes or no breakdown by sex,1992
3,1.2,Afghanistan,AFG,Both sexes or no breakdown by sex,1993
4,1.3,Afghanistan,AFG,Both sexes or no breakdown by sex,1994
...,...,...,...,...,...
15385,3.1,Zimbabwe,ZWE,Male,2012
15386,3.2,Zimbabwe,ZWE,Male,2013
15387,3.4,Zimbabwe,ZWE,Male,2014
15388,3.5,Zimbabwe,ZWE,Male,2015


In [29]:
# Reordeno las columnas
orden_columnas = ["Country", "Country_Code" ,"Year","Proportion of adults who are obese, 20 years old and over ,by sex","Gender" ]  # Define el nuevo orden de las columnas
df_obesidad = df_obesidad[orden_columnas]  
df_obesidad

Unnamed: 0,Country,Country_Code,Year,"Proportion of adults who are obese, 20 years old and over ,by sex",Gender
0,Afghanistan,AFG,1990,1.0,Both sexes or no breakdown by sex
1,Afghanistan,AFG,1991,1.1,Both sexes or no breakdown by sex
2,Afghanistan,AFG,1992,1.2,Both sexes or no breakdown by sex
3,Afghanistan,AFG,1993,1.2,Both sexes or no breakdown by sex
4,Afghanistan,AFG,1994,1.3,Both sexes or no breakdown by sex
...,...,...,...,...,...
15385,Zimbabwe,ZWE,2012,3.1,Male
15386,Zimbabwe,ZWE,2013,3.2,Male
15387,Zimbabwe,ZWE,2014,3.4,Male
15388,Zimbabwe,ZWE,2015,3.5,Male


In [30]:
# Filtro los paises de la OEA, pero sin Caribe
Countries =["ARG","BRA","CAN","CHL","COL","CRI","CUB","DOM","ECU","SLV","GTM",
        "GUY","HTI","HND","JAM","MEX","NIC","PAN","PRY","PER","TTO","URY"]
df_obesidad_filtrado = df_obesidad[df_obesidad["Country_Code"].isin(Countries)]
df_obesidad_filtrado

Unnamed: 0,Country,Country_Code,Year,"Proportion of adults who are obese, 20 years old and over ,by sex",Gender
162,Argentina,ARG,1990,16.6,Both sexes or no breakdown by sex
163,Argentina,ARG,1991,17.0,Both sexes or no breakdown by sex
164,Argentina,ARG,1992,17.4,Both sexes or no breakdown by sex
165,Argentina,ARG,1993,17.8,Both sexes or no breakdown by sex
166,Argentina,ARG,1994,18.2,Both sexes or no breakdown by sex
...,...,...,...,...,...
15196,Uruguay,URY,2012,23.1,Male
15197,Uruguay,URY,2013,23.6,Male
15198,Uruguay,URY,2014,24.2,Male
15199,Uruguay,URY,2015,24.8,Male


In [31]:
# Cuentos los nulos
df_obesidad_filtrado.isnull().sum()

Country                                                              0
Country_Code                                                         0
Year                                                                 0
Proportion of adults who are obese, 20 years old and over ,by sex    0
Gender                                                               0
dtype: int64

In [32]:
# Renombro las columnas del df, para que al momento de hacer la unión con el dataset de WB, coinicidan los nombres.
# Cambiar nombres de las columnas
df_obesidad_filtrado = df_obesidad_filtrado.rename(columns={'Country': 'País', 'Proportion of adults who are obese, 20 years old and over ,by sex': 'Proportion of adults who are obese', 'Year': "Año"})

In [33]:
# Procedo a dejar las columnas necesarias para hacer merge con el dataset de WB.
df_obesidad_filtrado = df_obesidad_filtrado.drop("Country_Code", axis=1) 
df_obesidad_filtrado = df_obesidad_filtrado.drop("Gender", axis=1) 
df_obesidad_filtrado.head()

Unnamed: 0,País,Año,Proportion of adults who are obese
162,Argentina,1990,16.6
163,Argentina,1991,17.0
164,Argentina,1992,17.4
165,Argentina,1993,17.8
166,Argentina,1994,18.2


In [13]:
df_obesidad_filtrado.to_csv('obesidad.csv', index=False)