In [32]:
import numpy as np
import pandas as pd

## Creating Heirarchical Indexes

In [37]:
a1=[1,1,2,2,3,3]
a2 = ['a', 'b', 'a', 'b', 'a', 'b']
idx = pd.MultiIndex.from_arrays((a1, a2))
idx

MultiIndex(levels=[[1, 2, 3], [u'a', u'b']],
           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])

In [47]:
s1 = pd.Series([2,4,6,8,10,12], index=idx)
s1

1  a     2
   b     4
2  a     6
   b     8
3  a    10
   b    12
dtype: int64

In [53]:
s1.index

MultiIndex(levels=[[1, 2, 3], [u'a', u'b']],
           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])

In [51]:
s2 = s1.sort_index(level=1)
s2

1  a     2
2  a     6
3  a    10
1  b     4
2  b     8
3  b    12
dtype: int64

In [52]:
s2.index

MultiIndex(levels=[[1, 2, 3], [u'a', u'b']],
           labels=[[0, 1, 2, 0, 1, 2], [0, 0, 0, 1, 1, 1]])

In [60]:
a1 = [1,2]
a2 = ['a', 'b', 'c']

pidx = pd.MultiIndex.from_product((a1, a2), names=['1st', '2nd'])

#### Note
The **levels attribute** contains the the unique values in each level of the index.<br>
The **lables attribute** contains the location of each "tuple" in the index.

In [61]:
pd.Series(np.arange(6), index = pidx)

1st  2nd
1    a      0
     b      1
     c      2
2    a      3
     b      4
     c      5
dtype: int32

In [64]:
## Input directly into series constructor

s3 = pd.Series(np.arange(6), index=[[1,2,1,2,1,2],['a','a', 'b', 'b', 'c', 'c']])
s3

1  a    0
2  a    1
1  b    2
2  b    3
1  c    4
2  c    5
dtype: int32

### Get Level Values

In [80]:
s3.index.get_level_values(1)

Index([u'a', u'a', u'b', u'b', u'c', u'c'], dtype='object')

In [84]:
idx = pd.MultiIndex.from_product([['A','B','C'], [1,2]])

In [85]:
df = pd.DataFrame({'a':np.random.randint(0,10,6), 
                   'b': [1,3.5,7,2,1,8],
                   'c': np.arange(6),
                  }, 
                  index=idx)

df

Unnamed: 0,Unnamed: 1,a,b,c
A,1,0,1.0,0
A,2,8,3.5,1
B,1,0,7.0,2
B,2,9,2.0,3
C,1,2,1.0,4
C,2,1,8.0,5


In [112]:
# Index by level 0
df.loc['B']

Unnamed: 0,a,b,c
1,0,7.0,2
2,9,2.0,3


In [114]:
# Alternative way to index by level 0
df.xs('B')

Unnamed: 0,a,b,c
1,0,7.0,2
2,9,2.0,3


In [139]:
# Index by multiple levels
df.loc['B', 2]  # or df.loc[('B', 2)] This allows to also slice the columns (see next cell)

a    9.0
b    2.0
c    3.0
Name: (B, 2), dtype: float64

In [138]:
df.loc[('B', 2), 'b':'c']

b    2.0
c    3.0
Name: (B, 2), dtype: float64

In [141]:
# Clice using tuples of the multi-index levels
df.loc[('A', 2):('C', 1), 'a':'b']

Unnamed: 0,Unnamed: 1,a,b
A,2,8,3.5
B,1,0,7.0
B,2,9,2.0
C,1,2,1.0


In [145]:
# Can specify a list of index labels
df.loc[[('A',1), ('B', 2)]]

Unnamed: 0,Unnamed: 1,a,b,c
A,1,0,1.0,0
B,2,9,2.0,3


In [125]:
# Can select specific rows and columns
df.loc[[('A',1), ('B', 2)], ['c', 'a']]

Unnamed: 0,Unnamed: 1,c,a
A,1,0,0
B,2,3,9


In [131]:
df

Unnamed: 0,Unnamed: 1,a,b,c
A,1,0,1.0,0
A,2,8,3.5,1
B,1,0,7.0,2
B,2,9,2.0,3
C,1,2,1.0,4
C,2,1,8.0,5


### Cross-Section - df.xs()

Makes getting a cross section at a particular level easier

In [159]:
# Get rows labeled 1 in the index level 1
df.xs(1, level=1)

Unnamed: 0,a,b,c
A,0,1.0,0
B,0,7.0,2
C,2,1.0,4


In [160]:
df.xs(['B',1]) #identical to df.loc[('B', 1)]

a    0.0
b    7.0
c    2.0
Name: (B, 1), dtype: float64

In [161]:
df.xs(['B',1], level=[0,1])

Unnamed: 0,Unnamed: 1,a,b,c
B,1,0,7.0,2


In [162]:
# Can also use it on columns
df.xs(['a','b'], axis=1)

Unnamed: 0,Unnamed: 1,a,b
A,1,0,1.0
A,2,8,3.5
B,1,0,7.0
B,2,9,2.0
C,1,2,1.0
C,2,1,8.0


In [92]:
arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
   ...:           ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second'])
df2 = pd.DataFrame(np.random.randn(3, 8), index=['A', 'B', 'C'], columns=index)
df2

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.091746,0.967865,0.393782,1.035353,0.077268,-1.894062,1.196955,0.676424
B,-0.497547,0.066628,-0.335888,-0.898971,-1.497678,-2.225031,1.436616,-1.317467
C,1.417635,0.058437,-1.016382,0.867199,1.933101,0.558878,1.24535,0.481493


In [96]:
df2['bar', 'one']

A    0.091746
B   -0.497547
C    1.417635
Name: (bar, one), dtype: float64

In [97]:
arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']),
   ....:           np.array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'])]
   ....: 

In [11]: s = pd.Series(np.random.randn(8), index=arrays)

In [100]:
s['qux']

one   -0.919204
two   -2.264550
dtype: float64