Multi-Index and Index Hierarchy
What is a Multi Index Data Frame
Over how to work with Multi-Index, first we'll create a quick example of what a Multi-Indexed DataFrame would look like:

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Index Levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))  #### creates a list of tuple pairs
hier_index = pd.MultiIndex.from_tuples(hier_index)  ### special function from pandas,creates multi index

In [3]:
hier_index          #### creates indexes of multple levels

#### G1 and G2 one level outside index
#### 1,2,3 another level inside index

MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [4]:
df = pd.DataFrame(np.random.randn(6,2),hier_index,['A','B'])

In [5]:
df               ##### creates levels of index
#### more like excel filters or databases with composite keys

Unnamed: 0,Unnamed: 1,A,B
G1,1,-0.240582,0.397048
G1,2,-0.257001,0.122646
G1,3,0.354506,-0.771667
G2,1,-2.117169,0.173228
G2,2,0.372784,0.633386
G2,3,0.118631,0.218097


In [6]:
df.loc['G1']            ### retruns everything under G1

Unnamed: 0,A,B
1,-0.240582,0.397048
2,-0.257001,0.122646
3,0.354506,-0.771667


In [7]:
df.loc['G1'].loc[1]         #### returns everything under and first row alone

A   -0.240582
B    0.397048
Name: 1, dtype: float64

In [8]:
df.loc['G1'].loc[1:2]         #### returns everything G under and first and 2nd row alone

Unnamed: 0,A,B
1,-0.240582,0.397048
2,-0.257001,0.122646


In [9]:
### to give names to index
df.index.names


FrozenList([None, None])

In [10]:
df.index.names = ['Groups','Num']        ### you can have as many level of layers as wee want

In [11]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.240582,0.397048
G1,2,-0.257001,0.122646
G1,3,0.354506,-0.771667
G2,1,-2.117169,0.173228
G2,2,0.372784,0.633386
G2,3,0.118631,0.218097


In [12]:
print(df.loc['G2'])
print('\n')

print(df.loc['G2'].loc[2])

print('\n')

print(df.loc['G2'].loc[2]['B'])

            A         B
Num                    
1   -2.117169  0.173228
2    0.372784  0.633386
3    0.118631  0.218097


A    0.372784
B    0.633386
Name: 2, dtype: float64


0.6333860390521187


In [None]:
#### CROSS SECTION FUNCTION - Returns a cross section of rows and columns 

In [13]:
df.loc['G1']

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.240582,0.397048
2,-0.257001,0.122646
3,0.354506,-0.771667


In [17]:
df.xs('G1')

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.240582,0.397048
2,-0.257001,0.122646
3,0.354506,-0.771667


In [18]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.240582,0.397048
G1,2,-0.257001,0.122646
G1,3,0.354506,-0.771667
G2,1,-2.117169,0.173228
G2,2,0.372784,0.633386
G2,3,0.118631,0.218097


In [20]:
#### if we want to extract rows with inside index of 1
#### note index 1 occurs both for Groups G1 as well as G2
df.xs(1,level='Num')


Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-0.240582,0.397048
G2,-2.117169,0.173228
