# 7 层级索引（hierarchical indexing）（机器学习，深度学习不重要）

In [2]:
import pandas as pd
import numpy as np

#MultiIndex是层级索引，索引类型的一种
index1 = pd.MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd'],
                [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]], names=['cloth', 'size'])

ser_obj = pd.Series(np.random.randn(12),index=index1)
print(ser_obj)
print(type(ser_obj)) #Series
print(type(ser_obj.index)) #索引类型，MultiIndex
print(ser_obj.index)
print(ser_obj.index.levels) #层级索引的索引值
ser_obj.index.codes  #没那么重要，代表索引的位置


cloth  size
a      0       0.704641
       1      -0.196776
       2       1.391466
b      0      -0.580849
       1       0.769705
       2      -0.937484
c      0       0.382047
       1      -0.812685
       2      -0.621520
d      0      -1.876351
       1      -1.027624
       2       2.732185
dtype: float64
<class 'pandas.Series'>
<class 'pandas.MultiIndex'>
MultiIndex([('a', 0),
            ('a', 1),
            ('a', 2),
            ('b', 0),
            ('b', 1),
            ('b', 2),
            ('c', 0),
            ('c', 1),
            ('c', 2),
            ('d', 0),
            ('d', 1),
            ('d', 2)],
           names=['cloth', 'size'])
[['a', 'b', 'c', 'd'], [0, 1, 2]]


FrozenList([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]])

In [3]:
ser_obj

cloth  size
a      0       0.704641
       1      -0.196776
       2       1.391466
b      0      -0.580849
       1       0.769705
       2      -0.937484
c      0       0.382047
       1      -0.812685
       2      -0.621520
d      0      -1.876351
       1      -1.027624
       2       2.732185
dtype: float64

In [4]:
#层级索引如何取数据
print('-'*50)
print(ser_obj['c']) #取出c的所有数据，取出的是series
print('-'*50)
print(ser_obj.loc['a', 2])
print('-'*50)
print(ser_obj[:, 2]) #取出所有行的内层索引为2的数据

--------------------------------------------------
size
0    0.382047
1   -0.812685
2   -0.621520
dtype: float64
--------------------------------------------------
1.3914657732676707
--------------------------------------------------
cloth
a    1.391466
b   -0.937484
c   -0.621520
d    2.732185
dtype: float64


## 交换层级

In [5]:
print(ser_obj.swaplevel())
print('-'*50)
print(ser_obj)
print('-'*50)
ser_obj=ser_obj.swaplevel()
print(ser_obj)

size  cloth
0     a        0.704641
1     a       -0.196776
2     a        1.391466
0     b       -0.580849
1     b        0.769705
2     b       -0.937484
0     c        0.382047
1     c       -0.812685
2     c       -0.621520
0     d       -1.876351
1     d       -1.027624
2     d        2.732185
dtype: float64
--------------------------------------------------
cloth  size
a      0       0.704641
       1      -0.196776
       2       1.391466
b      0      -0.580849
       1       0.769705
       2      -0.937484
c      0       0.382047
       1      -0.812685
       2      -0.621520
d      0      -1.876351
       1      -1.027624
       2       2.732185
dtype: float64
--------------------------------------------------
size  cloth
0     a        0.704641
1     a       -0.196776
2     a        1.391466
0     b       -0.580849
1     b        0.769705
2     b       -0.937484
0     c        0.382047
1     c       -0.812685
2     c       -0.621520
0     d       -1.876351
1     d       -1

In [6]:
print(ser_obj.sort_index(level=0))  #层级索引按那个索引级别排序,level=0表示按最外层索引排序

size  cloth
0     a        0.704641
      b       -0.580849
      c        0.382047
      d       -1.876351
1     a       -0.196776
      b        0.769705
      c       -0.812685
      d       -1.027624
2     a        1.391466
      b       -0.937484
      c       -0.621520
      d        2.732185
dtype: float64


In [7]:
ser_obj

size  cloth
0     a        0.704641
1     a       -0.196776
2     a        1.391466
0     b       -0.580849
1     b        0.769705
2     b       -0.937484
0     c        0.382047
1     c       -0.812685
2     c       -0.621520
0     d       -1.876351
1     d       -1.027624
2     d        2.732185
dtype: float64

In [8]:
#把最大索引变为列索引
df_obj=ser_obj.unstack()  #unstack的level参数是索引层级
print(df_obj)


cloth         a         b         c         d
size                                         
0      0.704641 -0.580849  0.382047 -1.876351
1     -0.196776  0.769705 -0.812685 -1.027624
2      1.391466 -0.937484 -0.621520  2.732185


In [9]:
print(df_obj)
#对df进行stack，就会把行，列索引进行堆叠，变为series
#把列索引放入内层,只能放到内层
print(df_obj.stack())  #stack变为series和unstack保持一致的
# df_obj=df_obj.transpose()

cloth         a         b         c         d
size                                         
0      0.704641 -0.580849  0.382047 -1.876351
1     -0.196776  0.769705 -0.812685 -1.027624
2      1.391466 -0.937484 -0.621520  2.732185
size  cloth
0     a        0.704641
      b       -0.580849
      c        0.382047
      d       -1.876351
1     a       -0.196776
      b        0.769705
      c       -0.812685
      d       -1.027624
2     a        1.391466
      b       -0.937484
      c       -0.621520
      d        2.732185
dtype: float64
