In [33]:
import pandas as pd
import numpy as np

df_prob = pd.read_csv('db/poblacion.csv')
df_prob

Unnamed: 0,Country,year,pop
0,Afghanistan,2015,34413603.0
1,Albania,2015,2880703.0
2,Algeria,2015,39728025.0
3,American Samoa,2015,55812.0
4,Andorra,2015,78011.0
...,...,...,...
1035,Pre-demographic dividend,2018,919485393.0
1036,Small states,2018,40575321.0
1037,South Asia,2018,1814388744.0
1038,South Asia (IDA & IBRD),2018,1814388744.0


In [34]:
pd.options.display.float_format = '{:,.1f}'.format

In [35]:
df_prob['year'] = pd.Categorical(df_prob['year'].apply(str))
df_prob.dtypes

Country      object
year       category
pop         float64
dtype: object

In [36]:
id_filtro = df_prob['Country'].isin(['Aruba','Colombia'])
id_filtro

0       False
1       False
2       False
3       False
4       False
        ...  
1035    False
1036    False
1037    False
1038    False
1039    False
Name: Country, Length: 1040, dtype: bool

In [37]:
df_sample = df_prob[id_filtro]
df_sample

Unnamed: 0,Country,year,pop
9,Aruba,2015,104341.0
42,Colombia,2015,47520667.0
269,Aruba,2016,104872.0
302,Colombia,2016,48171392.0
529,Aruba,2017,105366.0
562,Colombia,2017,48901066.0
789,Aruba,2018,105845.0
822,Colombia,2018,49648685.0


# Crear multiples indices

In [38]:
df_sample = df_sample.set_index(['Country','year']).sort_index()
df_sample

Unnamed: 0_level_0,Unnamed: 1_level_0,pop
Country,year,Unnamed: 2_level_1
Aruba,2015,104341.0
Aruba,2016,104872.0
Aruba,2017,105366.0
Aruba,2018,105845.0
Colombia,2015,47520667.0
Colombia,2016,48171392.0
Colombia,2017,48901066.0
Colombia,2018,49648685.0


## loc : Seleccion de indices en una pequeña muestra

In [39]:
df_sample.loc['Colombia',:].loc['2016',:]

pop   48,171,392.0
Name: 2016, dtype: float64

## xs : Seleccion de indices en una pequeña muestra

In [40]:
df_sample.xs(['Aruba','2018'])

pop   105,845.0
Name: (Aruba, 2018), dtype: float64

In [41]:
df_sample.xs('2018', level='year')

Unnamed: 0_level_0,pop
Country,Unnamed: 1_level_1
Aruba,105845.0
Colombia,49648685.0


Seleccionar 

In [42]:
df_countries = df_prob.set_index(['Country','year']).sort_index(ascending=[True,True])
df_countries

Unnamed: 0_level_0,Unnamed: 1_level_0,pop
Country,year,Unnamed: 2_level_1
Afghanistan,2015,34413603.0
Afghanistan,2016,35383128.0
Afghanistan,2017,36296400.0
Afghanistan,2018,37172386.0
Albania,2015,2880703.0
...,...,...
Zambia,2018,17351822.0
Zimbabwe,2015,13814629.0
Zimbabwe,2016,14030390.0
Zimbabwe,2017,14236745.0


## Crear un indice con los paises deseados

Por ejemplo: de Aruba a  Austria del año 2015 a 2017

In [43]:
ids = pd.IndexSlice
df_countries.loc[ids['Aruba':'Austria','2015':'2017'],:].sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,pop
Country,year,Unnamed: 2_level_1
Aruba,2015,104341.0
Aruba,2016,104872.0
Aruba,2017,105366.0
Australia,2015,23815995.0
Australia,2016,24190907.0
Australia,2017,24601860.0
Austria,2015,8642699.0
Austria,2016,8736668.0
Austria,2017,8797566.0


## Acceder al valor del indice deseado

In [44]:
df_countries.index.get_level_values(0)

Index(['Afghanistan', 'Afghanistan', 'Afghanistan', 'Afghanistan', 'Albania',
       'Albania', 'Albania', 'Albania', 'Algeria', 'Algeria',
       ...
       'Yemen, Rep.', 'Yemen, Rep.', 'Zambia', 'Zambia', 'Zambia', 'Zambia',
       'Zimbabwe', 'Zimbabwe', 'Zimbabwe', 'Zimbabwe'],
      dtype='object', name='Country', length=1040)

In [45]:
df_countries.index.get_level_values(1)

CategoricalIndex(['2015', '2016', '2017', '2018', '2015', '2016', '2017',
                  '2018', '2015', '2016',
                  ...
                  '2017', '2018', '2015', '2016', '2017', '2018', '2015',
                  '2016', '2017', '2018'],
                 categories=['2015', '2016', '2017', '2018'], ordered=False, name='year', dtype='category', length=1040)

Cuando trabajamos con un DataFrame con **multiples indices**, podemos extraer un **dato en especifico** con una **cadena de instrucciones**.

In [46]:
df_countries['pop']['Colombia']['2016']

48171392.0

Unas de las **grandes utilidades** es cuando tenemos que aplicar **funciones** matematicas sobre el.
Y nos obtiene la poblacion total en cada año.

In [47]:
df_countries.sum(level='year')

Unnamed: 0_level_0,pop
year,Unnamed: 1_level_1
2015,65679147019.0
2016,66487930677.0
2017,67294176701.0
2018,68087886692.0


In [48]:
df_sample

Unnamed: 0_level_0,Unnamed: 1_level_0,pop
Country,year,Unnamed: 2_level_1
Aruba,2015,104341.0
Aruba,2016,104872.0
Aruba,2017,105366.0
Aruba,2018,105845.0
Colombia,2015,47520667.0
Colombia,2016,48171392.0
Colombia,2017,48901066.0
Colombia,2018,49648685.0


# unstack : Nos permite cambia la estructura de nuestro DataFrame.

In [49]:
df_sample.unstack('year')

Unnamed: 0_level_0,pop,pop,pop,pop
year,2015,2016,2017,2018
Country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Aruba,104341.0,104872.0,105366.0,105845.0
Colombia,47520667.0,48171392.0,48901066.0,49648685.0


In [50]:
df_sample.unstack('year')

Unnamed: 0_level_0,pop,pop,pop,pop
year,2015,2016,2017,2018
Country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Aruba,104341.0,104872.0,105366.0,105845.0
Colombia,47520667.0,48171392.0,48901066.0,49648685.0


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=40d7a071-7e6a-4668-9914-a7e81441c111' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>