## **Librerías**

In [1]:
import pandas as pd
import numpy as np

In [2]:
import plotly.express as px
import plotly.graph_objects as go

## **Datos**

### **Información General**

In [194]:
df = pd.read_csv('../../data/life-expectancy-vs-gdp-per-capita.csv')

In [195]:
df.sample()

Unnamed: 0,Entity,Code,Year,Period life expectancy at birth - Sex: all - Age: 0,GDP per capita,417485-annotations,Population (historical estimates),Continent
21779,Guatemala,GTM,2006,69.8124,6305.8184,,13412401.0,


In [196]:
# Ajustamos el nombre de algunas columnas
df.rename(columns={
    'Entity': 'Country',
    'Period life expectancy at birth - Sex: all - Age: 0': 'LifeExp',
    'GDP per capita': 'GDPperCap',
    'Population (historical estimates)': 'Population'
}, inplace=True)

In [197]:
df.sample()

Unnamed: 0,Country,Code,Year,LifeExp,GDPperCap,417485-annotations,Population,Continent
44128,Philippines,PHL,1953,55.0467,1999.0,,21073750.0,


In [198]:
df.shape

(63370, 8)

In [199]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63370 entries, 0 to 63369
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Country             63370 non-null  object 
 1   Code                58762 non-null  object 
 2   Year                63370 non-null  int64  
 3   LifeExp             20755 non-null  float64
 4   GDPperCap           19876 non-null  float64
 5   417485-annotations  21 non-null     object 
 6   Population          58252 non-null  float64
 7   Continent           285 non-null    object 
dtypes: float64(3), int64(1), object(4)
memory usage: 3.9+ MB


In [200]:
df.describe()

Unnamed: 0,Year,LifeExp,GDPperCap,Population
count,63370.0,20755.0,19876.0,58252.0
mean,1607.942875,61.617992,6707.67944,49010820.0
std,1371.507043,12.917682,10120.349215,292554900.0
min,-10000.0,11.9951,295.0,0.0
25%,1821.0,52.18585,1553.0,146084.0
50%,1895.0,64.06,2798.0,1388504.0
75%,1964.0,71.82225,7130.2985,6600998.0
max,2021.0,86.5424,156299.0,7909295000.0


In [201]:
df.drop(['417485-annotations', 'Continent'], axis=1, inplace=True)

### **Información de los Continentes**

In [202]:
# Leemos un dataframe con información de los continentes
continents = pd.read_csv("https://pkgstore.datahub.io/JohnSnowLabs/country-and-continent-codes-list/country-and-continent-codes-list-csv_csv/data/b7876b7f496677669644f3d1069d3121/country-and-continent-codes-list-csv_csv.csv", sep=',')

In [203]:
continents.sample()

Unnamed: 0,Continent_Name,Continent_Code,Country_Name,Two_Letter_Country_Code,Three_Letter_Country_Code,Country_Number
196,North America,,"Saint Kitts and Nevis, Federation of",KN,KNA,659.0


In [204]:
# Nos quedamos con las columnas que necesitamos y ajustamos los nombres
continents = continents[['Continent_Name', 'Three_Letter_Country_Code']]
continents.columns = ['Continent', 'Country_Code']

In [205]:
continents.sample()

Unnamed: 0,Continent,Country_Code
171,Oceania,UMI


In [238]:
# Eliminamos los duplicados
continents.drop_duplicates('Country_Code', inplace=True)

### **Limpieza de los datos**

In [237]:
# Nos quedamos con los registros mayores a 1949
df = df.query('Year > 1949')

In [225]:
# Combinamos los dataframes
wf = pd.merge(
    df, 
    continents, 
    how='left', 
    left_on='Code',
    right_on='Country_Code'
)

In [228]:
# Eliminamos las columnas que no nos sirven
wf.drop(['Country_Code'], axis=1, inplace=True)

In [232]:
# Eliminamos aquellos registros que tengan algún valor nulo
wf.dropna(how='any', inplace=True)

In [236]:
# Ordenamos nuestro dataframe
wf.sort_values('Year', inplace=True)

## **Gráficos**

In [239]:
wf.sample()

Unnamed: 0,Country,Code,Year,LifeExp,GDPperCap,Population,Continent
18551,United Kingdom,GBR,1979,73.1622,20988.0,56265152.0,Europe


In [278]:
fig = px.scatter(
    wf,
    x='GDPperCap',
    y='LifeExp',
    size='Population',
    size_max=45,
    color='Continent',
    animation_frame='Year',
    animation_group='Country',
    # log_x=True,
    hover_data='Country',
    range_x=[-2000, 87000],
    range_y=[20, 100]
)

fig.update_traces(
    mode='markers',
    marker=dict(
        sizemode='area'
    )
)

fig.update_layout(
    title='Esperanza de vida vs PIB per capita (1950-2016)',
    xaxis=dict(
        title='PIB per Capita',
        gridcolor='white',
        gridwidth=2
    ),
    yaxis=dict(
        title='Esperanza de vida (Años)',
        gridcolor='white',
        gridwidth=2
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)'
)

fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 100

fig.show()

In [291]:
fig = px.scatter(
    wf,
    x='GDPperCap',
    y='LifeExp',
    size='Population',
    size_max=45,
    animation_frame='Year',
    color='Continent',
    facet_col='Continent',
)

fig.show()