In [1]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

In [2]:
# 在一个轴上有多个索引级别，以低维度形式处理高维度数据 
data = Series(np.random.randn(10),
              index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
                     [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
data

a  1   -0.868949
   2   -0.126999
   3    0.089926
b  1    1.778610
   2    0.755060
   3    0.634219
c  1   -0.649518
   2   -0.260536
d  2   -0.027350
   3   -0.022289
dtype: float64

In [5]:
data.index

MultiIndex(levels=[['a', 'b', 'c', 'd'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 1, 2]])

In [6]:
data.index.levels

FrozenList([['a', 'b', 'c', 'd'], [1, 2, 3]])

In [10]:
data.index.labels

FrozenList([[0, 0, 0, 1, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 1, 2]])

In [11]:
len(data.index.labels[0]) == len(data.index.labels[1])

True

In [12]:
# labels[0] 对应 levels[0]

In [13]:
data['b']

1    1.778610
2    0.755060
3    0.634219
dtype: float64

In [15]:
data['b':'c']

b  1    1.778610
   2    0.755060
   3    0.634219
c  1   -0.649518
   2   -0.260536
dtype: float64

In [20]:
type(data['b'])

pandas.core.series.Series

In [21]:
data['b'].index

Int64Index([1, 2, 3], dtype='int64')

In [22]:
data['b'][1]

1.7786097487233408

In [25]:
data['b', 1]

1.7786097487233408

In [23]:
data['b':'c']

b  1    1.778610
   2    0.755060
   3    0.634219
c  1   -0.649518
   2   -0.260536
dtype: float64

In [26]:
data.ix[['b', 'd']]

b  1    1.778610
   2    0.755060
   3    0.634219
d  2   -0.027350
   3   -0.022289
dtype: float64

In [27]:
data[:, 2]

a   -0.126999
b    0.755060
c   -0.260536
d   -0.027350
dtype: float64

In [28]:
# 层次化索引在数据重塑和基于分组的操作中扮演者重要角色
data.unstack()

Unnamed: 0,1,2,3
a,-0.868949,-0.126999,0.089926
b,1.77861,0.75506,0.634219
c,-0.649518,-0.260536,
d,,-0.02735,-0.022289


In [29]:
 data.unstack().stack()

a  1   -0.868949
   2   -0.126999
   3    0.089926
b  1    1.778610
   2    0.755060
   3    0.634219
c  1   -0.649518
   2   -0.260536
d  2   -0.027350
   3   -0.022289
dtype: float64

In [30]:
# 对于 DataFrame 每一条轴都可以有分层索引
frame = DataFrame(np.arange(12).reshape((4, 3)),
                  index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                  columns=[['Ohio', 'Ohio', 'Colorado'],
                           ['Green', 'Red', 'Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [31]:
frame.index.names = ['key1', 'key2']

In [32]:
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [33]:
frame.columns.names = ['state', 'color']
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [34]:
frame['Ohio']

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10
