## Indices multiniveis

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [12]:
# Níveis de Índice
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [13]:
outside

['G1', 'G1', 'G1', 'G2', 'G2', 'G2']

In [14]:
inside

[1, 2, 3, 1, 2, 3]

In [15]:
hier_index

MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [16]:
hier_index = list(zip(outside,inside))

In [17]:
hier_index

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [18]:
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [19]:
hier_index

MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [23]:
df = pd.DataFrame(np.random.randn(6,2), index=hier_index, columns=['A', 'B'])

In [24]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-0.143464,-0.257745
G1,2,-0.038641,-0.128847
G1,3,1.422443,-0.657825
G2,1,0.234055,0.202994
G2,2,1.074065,0.531178
G2,3,0.530938,-0.705648


In [25]:
df.loc['G1']

Unnamed: 0,A,B
1,-0.143464,-0.257745
2,-0.038641,-0.128847
3,1.422443,-0.657825


In [26]:
df.loc['G1']['A']

1   -0.143464
2   -0.038641
3    1.422443
Name: A, dtype: float64

In [27]:
df.loc['G1','A']

1   -0.143464
2   -0.038641
3    1.422443
Name: A, dtype: float64

In [28]:
df.index.names

FrozenList([None, None])

In [29]:
df.index.names = ['Grupo','Numero']

In [30]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grupo,Numero,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.143464,-0.257745
G1,2,-0.038641,-0.128847
G1,3,1.422443,-0.657825
G2,1,0.234055,0.202994
G2,2,1.074065,0.531178
G2,3,0.530938,-0.705648


In [32]:
df.xs('G1') # extrai dados do subnivel G1

Unnamed: 0_level_0,A,B
Numero,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.143464,-0.257745
2,-0.038641,-0.128847
3,1.422443,-0.657825


In [35]:
df.xs(1, level='Numero') # Extrai dados das linhas 1 no subnivel numeros

Unnamed: 0_level_0,A,B
Grupo,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-0.143464,-0.257745
G2,0.234055,0.202994


In [36]:
df.xs(3, level='Numero') # extrai dados das linhas 3 no subnivel numeros

Unnamed: 0_level_0,A,B
Grupo,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,1.422443,-0.657825
G2,0.530938,-0.705648


In [40]:
d = {'A':[1,2,np.nan] , 'B':[5,np.nan,np.nan] , 'C':[1,2,3] }

In [41]:
d

{'A': [1, 2, nan], 'B': [5, nan, nan], 'C': [1, 2, 3]}

In [42]:
df = pd.DataFrame(d)

In [43]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [44]:
df.dropna() # dropna por padrao exclui valores das linhas

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [45]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [46]:
df.dropna(thresh=2) # exclui as linhas por padrao com dois valores NaN

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2


In [49]:
df.fillna(value='fill na') # preenche NaN com valores do metodo fill na

Unnamed: 0,A,B,C
0,1,5,1
1,2,fill na,2
2,fill na,fill na,3


In [50]:
df.fillna(value=df['A'].mean()) # substitui todos os valores pela media da coluna A

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,1.5,2
2,1.5,1.5,3


In [52]:
df.fillna(method='ffill') # preenche com valores dos ultimos dados coletados antes do fil na 

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,5.0,2
2,2.0,5.0,3
