In [1]:
print("""
@Description: Hierarchical Indexing
@Author(s): Stephen CUI
@LastEditor(s): Stephen CUI
@CreatedTime: 2023-08-04 17:40:05
""")


@Description: Hierarchical Indexing
@Author(s): Stephen CUI
@LastEditor(s): Stephen CUI
@CreatedTime: 2023-08-04 17:40:05



In [3]:
import pandas as pd
import numpy as np

# Hierarchical Indexing

In [6]:
data = pd.Series(np.random.uniform(size=9),
                 index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
                        [1, 2, 3, 1, 3, 1, 2, 2, 3]])
data

a  1    0.730813
   2    0.657424
   3    0.588851
b  1    0.731546
   3    0.656188
c  1    0.530644
   2    0.707888
d  2    0.855674
   3    0.536054
dtype: float64

In [7]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [8]:
# 可以使用部分索引
data['b']

1    0.731546
3    0.656188
dtype: float64

In [10]:
data['b':'c']

b  1    0.731546
   3    0.656188
c  1    0.530644
   2    0.707888
dtype: float64

In [11]:
data.loc[['b', 'd']]

b  1    0.731546
   3    0.656188
d  2    0.855674
   3    0.536054
dtype: float64

In [14]:
# 甚至可以从内存的索引
data.loc[:, 2]

a    0.657424
c    0.707888
d    0.855674
dtype: float64

In [15]:
data.unstack()

Unnamed: 0,1,2,3
a,0.730813,0.657424,0.588851
b,0.731546,,0.656188
c,0.530644,0.707888,
d,,0.855674,0.536054


In [16]:
data.unstack().stack()

a  1    0.730813
   2    0.657424
   3    0.588851
b  1    0.731546
   3    0.656188
c  1    0.530644
   2    0.707888
d  2    0.855674
   3    0.536054
dtype: float64

In [18]:
# 任意轴都可以使用多层索引，包括 columns
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
                     index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                     columns=[['Ohio', 'Ohio', 'Colorado'],
                              ['Green', 'Red', 'Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [23]:
# 可以为多层 levels 命名
# 需要使用 names, name 只能用于单层的索引
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11
