# Hierarchical Indexing

In [1]:
import numpy as np
import pandas as pd

In [9]:
data = pd.Series(np.random.randn(9),
        index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
        [1, 2, 3, 1, 3, 1, 2, 2, 3] ] )
display(data)
#hierarchically indexed object partial indexing is possible

a  1   -0.915709
   2   -2.252896
   3    0.305133
b  1    2.504671
   3   -0.161717
c  1   -0.230194
   2    1.524644
d  2    1.384543
   3    1.001529
dtype: float64

In [10]:
display(data['a'])

1   -0.915709
2   -2.252896
3    0.305133
dtype: float64

In [16]:
display(data['b'])

1    2.504671
3   -0.161717
dtype: float64

In [17]:
display(data['c'])

1   -0.230194
2    1.524644
dtype: float64

In [11]:
data['b':'c']

b  1    2.504671
   3   -0.161717
c  1   -0.230194
   2    1.524644
dtype: float64

In [18]:
data['a':'b']

a  1   -0.915709
   2   -2.252896
   3    0.305133
b  1    2.504671
   3   -0.161717
dtype: float64

In [19]:
data['c':'d']

c  1   -0.230194
   2    1.524644
d  2    1.384543
   3    1.001529
dtype: float64

In [12]:
data.loc[['b', 'd']]

b  1    2.504671
   3   -0.161717
d  2    1.384543
   3    1.001529
dtype: float64

In [20]:
data.loc[['a', 'd']]

a  1   -0.915709
   2   -2.252896
   3    0.305133
d  2    1.384543
   3    1.001529
dtype: float64

In [21]:
data.loc[['b', 'c']]

b  1    2.504671
   3   -0.161717
c  1   -0.230194
   2    1.524644
dtype: float64

In [13]:
# i can access inner level of index, which will select all uper indexes at level 1
data.loc[:, 2]

a   -2.252896
c    1.524644
d    1.384543
dtype: float64

In [22]:
# i can access inner level of index, which will select all uper indexes at level 1
data.loc[:, 1]

a   -0.915709
b    2.504671
c   -0.230194
dtype: float64

In [23]:
# i can access inner level of index, which will select all uper indexes at level 1
data.loc[:, 3]

a    0.305133
b   -0.161717
d    1.001529
dtype: float64

In [27]:
#Hierarchical indexing plays an important role in reshaping data and group-based
#operations like forming a pivot table. For example, 
#you could rearrange the data into
#a DataFrame using its unstack method:
display(data)
df = data.unstack()
display(df)
df.stack()

a  1   -0.915709
   2   -2.252896
   3    0.305133
b  1    2.504671
   3   -0.161717
c  1   -0.230194
   2    1.524644
d  2    1.384543
   3    1.001529
dtype: float64

Unnamed: 0,1,2,3
a,-0.915709,-2.252896,0.305133
b,2.504671,,-0.161717
c,-0.230194,1.524644,
d,,1.384543,1.001529


a  1   -0.915709
   2   -2.252896
   3    0.305133
b  1    2.504671
   3   -0.161717
c  1   -0.230194
   2    1.524644
d  2    1.384543
   3    1.001529
dtype: float64

In [26]:
df.stack()

a  1   -0.915709
   2   -2.252896
   3    0.305133
b  1    2.504671
   3   -0.161717
c  1   -0.230194
   2    1.524644
d  2    1.384543
   3    1.001529
dtype: float64

In [29]:
data.unstack().stack()

a  1   -0.915709
   2   -2.252896
   3    0.305133
b  1    2.504671
   3   -0.161717
c  1   -0.230194
   2    1.524644
d  2    1.384543
   3    1.001529
dtype: float64

In [30]:
#With a DataFrame, either axis can have a hierarchical index
# [18]
import pandas as pd
import numpy as np
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
        index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
        columns=[['Ohio', 'Ohio', 'Colorado'],
        ['Green', 'Red', 'Green']])
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
display(frame)
#frame['Ohio']

#Be careful to distinguish the index names 'state' and 'color'
#from the row labels

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [32]:
#With a DataFrame, either axis can have a hierarchical index
# [18]
import pandas as pd
import numpy as np
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
        index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
        columns=[['Ohio', 'Ohio', 'Colorado'],
        ['Green', 'Red', 'Green']])
frame.index.names = ['key1', 'key2']

display(frame)
#frame['Ohio']

#Be careful to distinguish the index names 'state' and 'color'
#from the row labels

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [33]:
#With a DataFrame, either axis can have a hierarchical index
# [18]
import pandas as pd
import numpy as np
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
        index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
        columns=[['Ohio', 'Ohio', 'Colorado'],
        ['Green', 'Red', 'Green']])
frame.columns.names = ['state', 'color']
display(frame)
#frame['Ohio']

#Be careful to distinguish the index names 'state' and 'color'
#from the row labels

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [34]:
#With a DataFrame, either axis can have a hierarchical index
# [18]
import pandas as pd
import numpy as np
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
        index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
        columns=[['Ohio', 'Ohio', 'Colorado'],
        ['Green', 'Red', 'Green']])
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
display(frame)
frame['Ohio']

#Be careful to distinguish the index names 'state' and 'color'
#from the row labels

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [36]:
#With a DataFrame, either axis can have a hierarchical index
# [18]
import pandas as pd
import numpy as np
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
        index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
        columns=[['Ohio', 'Ohio', 'Colorado'],
        ['Green', 'Red', 'Green']])
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
display(frame)
frame['Colorado']

#Be careful to distinguish the index names 'state' and 'color'
#from the row labels

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


Unnamed: 0_level_0,color,Green
key1,key2,Unnamed: 2_level_1
a,1,2
a,2,5
b,1,8
b,2,11


In [37]:
#@@  Reordering and sorting Levels
frame.swaplevel('key1', 'key2')

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [39]:
#can save in the same frame variables 
# or another variables like
frame = frame.swaplevel('key1', 'key2')
display(frame)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [40]:
#sort_index , on the other hand, sorts the data using only the values in a single level.
#When swapping levels, it’s not uncommon to also use sort_index so that the result is
#lexicographically sorted by the indicated level:
frame.sort_index(level = 1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


In [42]:
# compare the above statement output with following statement output
frame.swaplevel(0, 1).sort_index(level = 0)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
1,b,6,7,8
2,a,3,4,5
2,b,9,10,11
