In [1]:
import pandas as pd

In [8]:
stocks = pd.read_csv('http://bit.ly/smallstocks')
stocks

Unnamed: 0,Date,Close,Volume,Symbol
0,2016-10-03,31.5,14070500,CSCO
1,2016-10-03,112.52,21701800,AAPL
2,2016-10-03,57.42,19189500,MSFT
3,2016-10-04,113.0,29736800,AAPL
4,2016-10-04,57.24,20085900,MSFT
5,2016-10-04,31.35,18460400,CSCO
6,2016-10-05,57.64,16726400,MSFT
7,2016-10-05,31.59,11808600,CSCO
8,2016-10-05,113.05,21453100,AAPL


In [3]:
stocks.index

RangeIndex(start=0, stop=9, step=1)

In [6]:
# Criando um groupby
stocks.groupby('Symbol').Close.mean()

Symbol
AAPL    112.856667
CSCO     31.480000
MSFT     57.433333
Name: Close, dtype: float64

In [9]:
# Gerando um MultiIndex
s = stocks.groupby(['Symbol', 'Date']).Close.mean()

In [10]:
s

Symbol  Date      
AAPL    2016-10-03    112.52
        2016-10-04    113.00
        2016-10-05    113.05
CSCO    2016-10-03     31.50
        2016-10-04     31.35
        2016-10-05     31.59
MSFT    2016-10-03     57.42
        2016-10-04     57.24
        2016-10-05     57.64
Name: Close, dtype: float64

In [11]:
# Observe que esse MultiIndex tem duas dimensões, portanto pode vir a se tornar um dataframe
s.index

MultiIndex([('AAPL', '2016-10-03'),
            ('AAPL', '2016-10-04'),
            ('AAPL', '2016-10-05'),
            ('CSCO', '2016-10-03'),
            ('CSCO', '2016-10-04'),
            ('CSCO', '2016-10-05'),
            ('MSFT', '2016-10-03'),
            ('MSFT', '2016-10-04'),
            ('MSFT', '2016-10-05')],
           names=['Symbol', 'Date'])

In [12]:
# Gerando o dataframe a partir do multiIndex de duas dimensões
s.unstack()

Date,2016-10-03,2016-10-04,2016-10-05
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,112.52,113.0,113.05
CSCO,31.5,31.35,31.59
MSFT,57.42,57.24,57.64


In [14]:
# Outra forma para chegar nesse dataframe
df = stocks.pivot_table(values='Close', index = 'Symbol', columns='Date')
df

Date,2016-10-03,2016-10-04,2016-10-05
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,112.52,113.0,113.05
CSCO,31.5,31.35,31.59
MSFT,57.42,57.24,57.64


In [15]:
# Slicing da Series com multiIndex
s.loc['AAPL']

Date
2016-10-03    112.52
2016-10-04    113.00
2016-10-05    113.05
Name: Close, dtype: float64

In [16]:
# Pode funcionar semelhante ao slicing do dataframe
s.loc['AAPL', '2016-10-04']

113.0

In [17]:
s.loc[:, '2016-10-04']

Symbol
AAPL    113.00
CSCO     31.35
MSFT     57.24
Name: Close, dtype: float64

In [19]:
# Criando um multiIndex num dataframe
stocks.set_index(['Symbol', 'Date'], inplace=True)
stocks

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
CSCO,2016-10-03,31.5,14070500
AAPL,2016-10-03,112.52,21701800
MSFT,2016-10-03,57.42,19189500
AAPL,2016-10-04,113.0,29736800
MSFT,2016-10-04,57.24,20085900
CSCO,2016-10-04,31.35,18460400
MSFT,2016-10-05,57.64,16726400
CSCO,2016-10-05,31.59,11808600
AAPL,2016-10-05,113.05,21453100


In [20]:
# Para organiza-lo basta usar o sort
stocks.sort_index(inplace=True)

In [21]:
stocks

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,2016-10-03,112.52,21701800
AAPL,2016-10-04,113.0,29736800
AAPL,2016-10-05,113.05,21453100
CSCO,2016-10-03,31.5,14070500
CSCO,2016-10-04,31.35,18460400
CSCO,2016-10-05,31.59,11808600
MSFT,2016-10-03,57.42,19189500
MSFT,2016-10-04,57.24,20085900
MSFT,2016-10-05,57.64,16726400


In [23]:
# Slicing no dataframe
stocks.loc['AAPL']

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-10-03,112.52,21701800
2016-10-04,113.0,29736800
2016-10-05,113.05,21453100


In [24]:
# No espaço index se passa os parametros dos indexes em uma tupla
stocks.loc[('AAPL', '2016-10-04'), :]

Close          113.0
Volume    29736800.0
Name: (AAPL, 2016-10-04), dtype: float64

In [25]:
# Para selecionar multiplos indexes é passada uma lista na sua chamada
stocks.loc[(['AAPL', 'MSFT'], '2016-10-04'), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,2016-10-04,113.0,29736800
MSFT,2016-10-04,57.24,20085900


In [28]:
# Para selecionar toda uma classe de um multiIndex é atribuido o slice(none)
stocks.loc[(slice(None), ['2016-10-04', '2016-10-03']), 'Close']

Symbol  Date      
AAPL    2016-10-03    112.52
        2016-10-04    113.00
CSCO    2016-10-03     31.50
        2016-10-04     31.35
MSFT    2016-10-03     57.42
        2016-10-04     57.24
Name: Close, dtype: float64