In [3]:
#We now turn to Hierarchical Indexing, a feature that allows to label a certain value with multiple indexes from a single axis
import pandas as pd
import numpy as np
from pandas import Series, DataFrame

In [11]:
series_obj = Series(np.random.randn(13), index=[list('aaaabbbbcccdd'),[1,2,3,4,1,2,3,4,1,2,3,1,2]])
series_obj

a  1   -0.900546
   2    0.468318
   3    0.954627
   4    1.241746
b  1    0.762986
   2    0.177067
   3    0.894488
   4   -1.041670
c  1   -0.588357
   2    0.741074
   3   -2.934299
d  1    0.584868
   2    0.575339
dtype: float64

In [12]:
#This type of index is known as MultiIndex
#The gaps indicate that the same index label is being used as in the above label

series_obj.index #Returns the index values of the series_obj

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('a', 4),
            ('b', 1),
            ('b', 2),
            ('b', 3),
            ('b', 4),
            ('c', 1),
            ('c', 2),
            ('c', 3),
            ('d', 1),
            ('d', 2)],
           )

In [13]:
#We can select certain portions of the data using partial indexing in hierarchical indexing
series_obj['b'] #Gives all values with 1st index label 'b'

1    0.762986
2    0.177067
3    0.894488
4   -1.041670
dtype: float64

In [14]:
series_obj['b',2] #Gives the value stored at 1st index label 'b' and 2nd index label 2

0.17706738014818962

In [15]:
series_obj['b':'c'] #Gives all values with 1st index label between 'b' and 'c'

b  1    0.762986
   2    0.177067
   3    0.894488
   4   -1.041670
c  1   -0.588357
   2    0.741074
   3   -2.934299
dtype: float64

In [16]:
series_obj.loc[['b','d']] #Gives all values with location of 1st label in list['b','d']

b  1    0.762986
   2    0.177067
   3    0.894488
   4   -1.041670
d  1    0.584868
   2    0.575339
dtype: float64

In [17]:
#We can also perform indexing from inner level, i.e. from the 2nd label with
series_obj[:,2] #Gives all values with inner label 2

a    0.468318
b    0.177067
c    0.741074
d    0.575339
dtype: float64

In [18]:
#Hierarchical Indexing plays a vital role in data analysis as it can easily create pivot tables from data like these
#Suppose we want to create a DataFrame of the given series_obj
#We can use the unstack method to do that as:

series_obj.unstack()

Unnamed: 0,1,2,3,4
a,-0.900546,0.468318,0.954627,1.241746
b,0.762986,0.177067,0.894488,-1.04167
c,-0.588357,0.741074,-2.934299,
d,0.584868,0.575339,,


In [20]:
#Similarly given a dataframe we can convert it into a hierarchical series using stack
series_obj.unstack().stack()

a  1   -0.900546
   2    0.468318
   3    0.954627
   4    1.241746
b  1    0.762986
   2    0.177067
   3    0.894488
   4   -1.041670
c  1   -0.588357
   2    0.741074
   3   -2.934299
d  1    0.584868
   2    0.575339
dtype: float64

In [23]:
#Consider the data frame:
frame_obj = DataFrame(np.random.randn(12).reshape(4,3), index=[list('aabb'),[1,2,1,2]], columns=[['data1','data2','data2'],['first','first','second']])
frame_obj

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2,data2
Unnamed: 0_level_1,Unnamed: 1_level_1,first,first,second
a,1,-0.195094,0.637251,1.550994
a,2,-0.031725,0.741698,-1.453169
b,1,-0.261541,-0.531194,0.454459
b,2,-1.249596,0.48857,1.786483


In [25]:
#Like in ordinary dataframes, we can name each sub column and sub row of the dataframe
frame_obj.index.names = ['key1','key2'] #Names the index label columns
frame_obj.columns.names = ['Datatype:', 'Dataset:'] #Names the value columns
frame_obj

Unnamed: 0_level_0,Datatype:,data1,data2,data2
Unnamed: 0_level_1,Dataset:,first,first,second
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,-0.195094,0.637251,1.550994
a,2,-0.031725,0.741698,-1.453169
b,1,-0.261541,-0.531194,0.454459
b,2,-1.249596,0.48857,1.786483


In [26]:
#Like Series, we can apply partial indexing to hierarchical dataframes as well
frame_obj['data2'] #Gives all columns with data1 as its column name 

Unnamed: 0_level_0,Dataset:,first,second
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0.637251,1.550994
a,2,0.741698,-1.453169
b,1,-0.531194,0.454459
b,2,0.48857,1.786483


In [29]:
frame_obj.index #Gives the Multiindex of the Data Frame index

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           names=['key1', 'key2'])

In [30]:
frame_obj.columns #Gives the Multiindex of the Data Frame columns

MultiIndex([('data1',  'first'),
            ('data2',  'first'),
            ('data2', 'second')],
           names=['Datatype:', 'Dataset:'])