In [33]:
import pandas as pd
import numpy as np

In [34]:
# 1st outermost then innermost
data = pd.Series(np.random.randn(9),
    index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
    [1, 2, 3, 1, 3, 1, 2, 2, 3]])
data

a  1    0.099446
   2    0.013740
   3    1.425988
b  1   -1.443415
   3    0.450973
c  1   -0.145701
   2   -1.413169
d  2    0.466063
   3    0.529243
dtype: float64

In [35]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [36]:
# partial / subset selection
data['a']

1    0.099446
2    0.013740
3    1.425988
dtype: float64

In [37]:
data['b':'c']

b  1   -1.443415
   3    0.450973
c  1   -0.145701
   2   -1.413169
dtype: float64

In [38]:
data.loc[['b','c']]

b  1   -1.443415
   3    0.450973
c  1   -0.145701
   2   -1.413169
dtype: float64

In [39]:
# 1st index all second 2
# [ outer , inner ]
data.loc[:,2]

a    0.013740
c   -1.413169
d    0.466063
dtype: float64

In [40]:
# .unstack() method rearranges to df with inner->cols,outer->row
data.unstack()

Unnamed: 0,1,2,3
a,0.099446,0.01374,1.425988
b,-1.443415,,0.450973
c,-0.145701,-1.413169,
d,,0.466063,0.529243


In [41]:
# stack() lol
data.unstack().stack()

a  1    0.099446
   2    0.013740
   3    1.425988
b  1   -1.443415
   3    0.450973
c  1   -0.145701
   2   -1.413169
d  2    0.466063
   3    0.529243
dtype: float64

In [42]:
# DF either axis can have hierchial index (nested list: [outer,inner])
df = pd.DataFrame(np.random.randn(4,4),
                index=[['a','b','b','c'], [1,1,2,1]],
                columns=[['Ohio','Ohio', 'Colorado', 'Colorado'],['Red',"Green", "Yellow", "Red"]]
                )
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Red,Green,Yellow,Red
a,1,1.776587,-0.173815,0.741636,-0.394238
b,1,-1.074837,0.861589,-0.218459,-1.594308
b,2,-0.923523,-0.281822,0.806316,0.44088
c,1,-0.914742,0.579145,-0.134468,0.441971


In [43]:
# naming
df.columns.names = [ 'state' , 'color']
df.index.names = [ 'key1' , 'key2']
df

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado,Colorado
Unnamed: 0_level_1,color,Red,Green,Yellow,Red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,1.776587,-0.173815,0.741636,-0.394238
b,1,-1.074837,0.861589,-0.218459,-1.594308
b,2,-0.923523,-0.281822,0.806316,0.44088
c,1,-0.914742,0.579145,-0.134468,0.441971


In [44]:
df['Colorado']

Unnamed: 0_level_0,color,Yellow,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0.741636,-0.394238
b,1,-0.218459,-1.594308
b,2,0.806316,0.44088
c,1,-0.134468,0.441971


## Reordering Swapping

In [45]:
# reorder row index cant col index
#swaplevel
df.swaplevel('key1','key2')

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado,Colorado
Unnamed: 0_level_1,color,Red,Green,Yellow,Red
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1,a,1.776587,-0.173815,0.741636,-0.394238
1,b,-1.074837,0.861589,-0.218459,-1.594308
2,b,-0.923523,-0.281822,0.806316,0.44088
1,c,-0.914742,0.579145,-0.134468,0.441971


In [46]:
df.sort_index(level=1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado,Colorado
Unnamed: 0_level_1,color,Red,Green,Yellow,Red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,1.776587,-0.173815,0.741636,-0.394238
b,1,-1.074837,0.861589,-0.218459,-1.594308
c,1,-0.914742,0.579145,-0.134468,0.441971
b,2,-0.923523,-0.281822,0.806316,0.44088


In [47]:
# lvl 0 outermost 
df.swaplevel(0,1).sort_index(level=0)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado,Colorado
Unnamed: 0_level_1,color,Red,Green,Yellow,Red
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1,a,1.776587,-0.173815,0.741636,-0.394238
1,b,-1.074837,0.861589,-0.218459,-1.594308
1,c,-0.914742,0.579145,-0.134468,0.441971
2,b,-0.923523,-0.281822,0.806316,0.44088


In [48]:
df

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado,Colorado
Unnamed: 0_level_1,color,Red,Green,Yellow,Red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,1.776587,-0.173815,0.741636,-0.394238
b,1,-1.074837,0.861589,-0.218459,-1.594308
b,2,-0.923523,-0.281822,0.806316,0.44088
c,1,-0.914742,0.579145,-0.134468,0.441971


## Summary Stats by lvl

In [52]:
# aggregation on level=
# not working
# df.sum(level='color', axis=1)
# df.sum(level='key2')
df

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado,Colorado
Unnamed: 0_level_1,color,Red,Green,Yellow,Red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,1.776587,-0.173815,0.741636,-0.394238
b,1,-1.074837,0.861589,-0.218459,-1.594308
b,2,-0.923523,-0.281822,0.806316,0.44088
c,1,-0.914742,0.579145,-0.134468,0.441971


In [56]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
     index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
     columns=[['Ohio', 'Ohio', 'Colorado'],
     ['Green', 'Red', 'Green']])

frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame


Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


## Idexing with dataframe col


In [59]:
# dataframe col <-> row index
# set_index([col1,col2] , <drop=False to retain cols lmnao>) 
# reset_index() moves back row index to cols 
frame = pd.DataFrame({'a': range(7), 'b': range(7, 0, -1),
 'c': ['one', 'one', 'one', 'two', 'two',
 'two', 'two'],
 'd': [0, 1, 2, 0, 1, 2, 3]})
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [65]:
f2 = frame.set_index(['c','d'])
f2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [66]:
f2.reset_index()

Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,one,2,2,5
3,two,0,3,4
4,two,1,4,3
5,two,2,5,2
6,two,3,6,1
