In [1]:
import pandas as pd
import numpy as np

In [3]:
arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
          ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
arrays

[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
 ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]

In [4]:
tuples = list(zip(*arrays))
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [5]:
index = pd.MultiIndex.from_tuples(tuples, names= ['first', 'second'])
index

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [32]:
s = pd.Series(np.random.randn(8), index = index)
s

first  second
bar    one       0.784658
       two       0.145302
baz    one      -0.414267
       two       0.315707
foo    one      -0.032590
       two      -0.558228
qux    one      -0.279213
       two      -0.900212
dtype: float64

In [6]:
#can use iterables for pairig elements
iterables = [['bar', 'baz', 'foo', 'qux'],['one','two']]
pd.MultiIndex.from_product(iterables, names =['first','second'])

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [16]:
#A convenience way is to create a list of arrays directly into series and dataframe
arrays = [np.array(['bar','bar','baz','baz','foo','foo','qux','qux']),
          np.array(['one','two','one','two','one','two','one','two'])]
s = pd.Series(np.random.randn(8), index = arrays)
s

bar  one   -0.486173
     two    1.760803
baz  one   -0.293384
     two   -1.087603
foo  one    1.268892
     two    1.913520
qux  one   -1.968694
     two    0.210413
dtype: float64

In [6]:
df = pd.DataFrame(np.random.randn(8,4), index = arrays)
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,0.151452,-0.263409,-0.531524,-2.003837
bar,two,-0.118683,0.543722,0.276653,0.348292
baz,one,0.048091,-0.924114,0.026066,0.657569
baz,two,0.933125,2.430677,0.445485,1.68591
foo,one,-0.919424,0.990266,0.26122,-0.44254
foo,two,0.749374,0.683256,2.096148,-0.969614
qux,one,-0.261739,0.727836,-0.126897,0.033019
qux,two,-0.978719,-1.494016,1.526114,1.758304


In [7]:
df.index.names

FrozenList([None, None])

In [33]:
df = pd.DataFrame(np.random.randn(3,8),index = ['A','B','C'], columns= index)
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,2.540509,-0.232595,0.540136,-1.57319,-0.664569,1.806389,-0.978332,-0.492675
B,-1.338722,-1.28312,2.427546,-0.228754,1.651015,-0.756029,2.006049,0.339074
C,1.035052,1.823462,0.071748,-0.453054,0.073654,0.029799,-0.109934,0.41627


In [8]:
df = pd.DataFrame(np.random.randn(6,6), index = index[:6], columns= index[:6])
df

Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,1.176547,0.69461,0.896406,-1.549669,1.010353,-1.693472
bar,two,-0.188712,2.304357,-1.097033,-0.405818,-0.492924,-1.033673
baz,one,0.394627,-0.670016,0.497758,0.256942,-0.499886,-0.053103
baz,two,-0.651958,0.476078,0.790749,1.668343,0.689039,-2.183258
foo,one,0.775216,1.97576,-0.947833,2.323301,-1.954765,-0.083656
foo,two,0.809295,-1.381631,0.835066,-1.654472,-1.077802,0.317173


In [8]:
pd.Series(np.random.randn(8), index=tuples)

(bar, one)    0.483367
(bar, two)    0.187785
(baz, one)   -0.350017
(baz, two)   -0.308330
(foo, one)   -1.090919
(foo, two)   -0.709490
(qux, one)    1.120639
(qux, two)    0.164297
dtype: float64

In [11]:
pd.set_option('display.multi_sparse', False)

In [12]:
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,0.151452,-0.263409,-0.531524,-2.003837
bar,two,-0.118683,0.543722,0.276653,0.348292
baz,one,0.048091,-0.924114,0.026066,0.657569
baz,two,0.933125,2.430677,0.445485,1.68591
foo,one,-0.919424,0.990266,0.26122,-0.44254
foo,two,0.749374,0.683256,2.096148,-0.969614
qux,one,-0.261739,0.727836,-0.126897,0.033019
qux,two,-0.978719,-1.494016,1.526114,1.758304


In [13]:
pd.set_option('display.multi_sparse', True)

In [15]:
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,0.151452,-0.263409,-0.531524,-2.003837
bar,two,-0.118683,0.543722,0.276653,0.348292
baz,one,0.048091,-0.924114,0.026066,0.657569
baz,two,0.933125,2.430677,0.445485,1.68591
foo,one,-0.919424,0.990266,0.26122,-0.44254
foo,two,0.749374,0.683256,2.096148,-0.969614
qux,one,-0.261739,0.727836,-0.126897,0.033019
qux,two,-0.978719,-1.494016,1.526114,1.758304


In [18]:
#reconstructing level
index.get_level_values(0)

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [20]:
index.get_level_values(1)#same as below 

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

In [19]:
index.get_level_values('second')

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

In [23]:
#basic indexing
df['bar']

second,one,two
A,0.923736,0.522532
B,0.180661,-0.029935
C,-1.786865,1.242733


In [24]:
df['bar', 'one']

A    0.923736
B    0.180661
C   -1.786865
Name: (bar, one), dtype: float64

In [25]:
df['bar']['one']

A    0.923736
B    0.180661
C   -1.786865
Name: one, dtype: float64

In [26]:
s['qux']

second
one    1.189891
two    0.235638
dtype: float64

In [None]:
i

In [27]:
df.columns

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [28]:
df[['foo','qux']].columns

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[2, 2, 3, 3], [0, 1, 0, 1]],
           names=['first', 'second'])

In [22]:
index.get_level_values(0)

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [23]:
index.get_level_values(1)

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

In [29]:
df[['foo','qux']].columns.get_level_values(0)

Index(['foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [30]:
df[['foo','qux']].columns.get_level_values(1)

Index(['one', 'two', 'one', 'two'], dtype='object', name='second')

In [31]:
#to get used values
df[['foo','qux']].columns.values

array([('foo', 'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')],
      dtype=object)

In [32]:
df[['foo','qux']].columns.remove_unused_levels()

MultiIndex(levels=[['foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
           names=['first', 'second'])

In [None]:
#reindexing for data alignment in the axes

In [9]:
s

first  second
bar    one      -0.079916
       two      -1.233097
baz    one      -0.427179
       two       0.533570
foo    one      -0.162166
       two      -0.168130
qux    one       0.017568
       two      -1.565765
dtype: float64

In [10]:
s[::2]

first  second
bar    one      -0.079916
baz    one      -0.427179
foo    one      -0.162166
qux    one       0.017568
dtype: float64

In [12]:
s + s[::2]

first  second
bar    one      -0.159832
       two            NaN
baz    one      -0.854357
       two            NaN
foo    one      -0.324331
       two            NaN
qux    one       0.035136
       two            NaN
dtype: float64

In [29]:
s[::3]

bar  one    1.207353
baz  two   -1.046503
qux  one   -0.693169
dtype: float64

In [14]:
df.reindex(index[:3])

Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,1.176547,0.69461,0.896406,-1.549669,1.010353,-1.693472
bar,two,-0.188712,2.304357,-1.097033,-0.405818,-0.492924,-1.033673
baz,one,0.394627,-0.670016,0.497758,0.256942,-0.499886,-0.053103


In [13]:
s.reindex(index[:3])#pasing multiindex.reindex is used to access the rows

first  second
bar    one      -0.079916
       two      -1.233097
baz    one      -0.427179
dtype: float64

In [15]:
s.reindex([('foo', 'two'), ('bar', 'one'), ('qux', 'one'), ('baz', 'one')])#passing list of tuples

first  second
foo    two      -0.168130
bar    one      -0.079916
qux    one       0.017568
baz    one      -0.427179
dtype: float64

In [17]:
s.reindex(arrays)#passing arrays

bar  one   -0.486173
     two    1.760803
baz  one   -0.293384
     two   -1.087603
foo  one    1.268892
     two    1.913520
qux  one   -1.968694
     two    0.210413
dtype: float64

In [18]:
df1 = pd.DataFrame(np.random.randn(3,8),index = ['A','B','C'], columns= index)
df1

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.99715,1.589528,-1.111643,-1.430474,-0.590206,1.228181,0.567001,-1.411329
B,-0.393107,1.424214,-2.07361,-1.191693,-0.27028,1.65885,1.855833,-0.09078
C,-1.026809,0.435933,0.358121,0.680504,1.680684,-0.255204,-0.168779,0.540767


In [19]:
df1 = df1.T
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.99715,-0.393107,-1.026809
bar,two,1.589528,1.424214,0.435933
baz,one,-1.111643,-2.07361,0.358121
baz,two,-1.430474,-1.191693,0.680504
foo,one,-0.590206,-0.27028,1.680684
foo,two,1.228181,1.65885,-0.255204
qux,one,0.567001,1.855833,-0.168779
qux,two,-1.411329,-0.09078,0.540767


In [34]:
df1.loc['bar']

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0.611478,-0.898818,-1.130583
two,-0.372327,0.61179,1.205405


In [20]:
df1.loc['bar','two']

A    1.589528
B    1.424214
C    0.435933
Name: (bar, two), dtype: float64

In [21]:
df1.loc[('baz','two'):('qux','one')]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,-1.430474,-1.191693,0.680504
foo,one,-0.590206,-0.27028,1.680684
foo,two,1.228181,1.65885,-0.255204
qux,one,0.567001,1.855833,-0.168779


In [22]:
df1.loc['baz':'foo']

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,one,-1.111643,-2.07361,0.358121
baz,two,-1.430474,-1.191693,0.680504
foo,one,-0.590206,-0.27028,1.680684
foo,two,1.228181,1.65885,-0.255204


In [23]:
df1.loc[('baz', 'two'):'foo']

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,-1.430474,-1.191693,0.680504
foo,one,-0.590206,-0.27028,1.680684
foo,two,1.228181,1.65885,-0.255204


In [24]:
#Passing a list of labels or tuples works similar to reindexing:
df1.loc[[('bar', 'two'), ('qux', 'one')]]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,two,1.589528,1.424214,0.435933
qux,one,0.567001,1.855833,-0.168779


In [2]:
#using slicing for multiple indexing and pd.indexslicer
def mklbl(prefix,n):
    return ["%s%s" % (prefix,i) for i in range(n)]


In [3]:
miindex = pd.MultiIndex.from_product([mklbl('A',4),mklbl('B',2),
                                      mklbl('C',4),mklbl('D',2)]);miindex

MultiIndex(levels=[['A0', 'A1', 'A2', 'A3'], ['B0', 'B1'], ['C0', 'C1', 'C2', 'C3'], ['D0', 'D1']],
           labels=[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]])

In [4]:
miindex.get_level_values(3)

Index(['D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1',
       'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1',
       'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1',
       'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1',
       'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1', 'D0', 'D1',
       'D0', 'D1', 'D0', 'D1'],
      dtype='object')

In [5]:
micolumns = pd.MultiIndex.from_tuples([('a','foo'),('a','bar'),
 ('b','foo'),('b','bah')],
  names=['lvl0', 'lvl1'])

In [6]:
len(miindex)

64

In [7]:
len(micolumns)

4

In [8]:
dfmi = pd.DataFrame(np.arange(len(miindex)*len(micolumns)).reshape((len(miindex),len(micolumns))),
                   index = miindex,
                   columns = micolumns).sort_index().sort_index(axis=1);dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
A0,B0,C2,D1,21,20,23,22
A0,B0,C3,D0,25,24,27,26
A0,B0,C3,D1,29,28,31,30
A0,B1,C0,D0,33,32,35,34
A0,B1,C0,D1,37,36,39,38


In [9]:
#Basic multi-index slicing using slices, lists, and labels.
dfmi.loc[(slice('A1','A3'),slice(None),['C1','C2']),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C2,D0,81,80,83,82
A1,B0,C2,D1,85,84,87,86
A1,B1,C1,D0,105,104,107,106
A1,B1,C1,D1,109,108,111,110
A1,B1,C2,D0,113,112,115,114
A1,B1,C2,D1,117,116,119,118
A2,B0,C1,D0,137,136,139,138
A2,B0,C1,D1,141,140,143,142


In [12]:
dfmi.loc[(slice('A1','A3'),slice(None),['C1','C2'],slice('D0')),:]#sice up to A1:A3, all B ,C1:C2 and D0 indexes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C2,D0,81,80,83,82
A1,B1,C1,D0,105,104,107,106
A1,B1,C2,D0,113,112,115,114
A2,B0,C1,D0,137,136,139,138
A2,B0,C2,D0,145,144,147,146
A2,B1,C1,D0,169,168,171,170
A2,B1,C2,D0,177,176,179,178
A3,B0,C1,D0,201,200,203,202
A3,B0,C2,D0,209,208,211,210


In [13]:
dfmi.loc[(slice('A1','A2'),slice(None),['C1','C2']),:]#slice upto A2,all B index and C1 up to C2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C2,D0,81,80,83,82
A1,B0,C2,D1,85,84,87,86
A1,B1,C1,D0,105,104,107,106
A1,B1,C1,D1,109,108,111,110
A1,B1,C2,D0,113,112,115,114
A1,B1,C2,D1,117,116,119,118
A2,B0,C1,D0,137,136,139,138
A2,B0,C1,D1,141,140,143,142


In [15]:
#alternatively you can use pd.indexslicer to have a more natural slicing
idx = pd.IndexSlice

In [16]:
dfmi.loc[idx[:,:,['C1','C2']], idx[:,'foo']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
A0,B0,C1,D0,8,10
A0,B0,C1,D1,12,14
A0,B0,C2,D0,16,18
A0,B0,C2,D1,20,22
A0,B1,C1,D0,40,42
A0,B1,C1,D1,44,46
A0,B1,C2,D0,48,50
A0,B1,C2,D1,52,54
A1,B0,C1,D0,72,74
A1,B0,C1,D1,76,78


In [21]:
#example of complicated selectors
dfmi.loc['A1',(slice(None),'foo')]

Unnamed: 0_level_0,Unnamed: 1_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,lvl1,foo,foo
B0,C0,D0,64,66
B0,C0,D1,68,70
B0,C1,D0,72,74
B0,C1,D1,76,78
B0,C2,D0,80,82
B0,C2,D1,84,86
B0,C3,D0,88,90
B0,C3,D1,92,94
B1,C0,D0,96,98
B1,C0,D1,100,102


In [22]:
dfmi.loc[idx[:,:,:,:],:]#selects all 4 multiindexes and columns

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
A0,B0,C2,D1,21,20,23,22
A0,B0,C3,D0,25,24,27,26
A0,B0,C3,D1,29,28,31,30
A0,B1,C0,D0,33,32,35,34
A0,B1,C0,D1,37,36,39,38


In [23]:
#slicing using boolean
mask = dfmi[('a','foo')] > 200

In [25]:
dfmi.loc[idx[mask,:,['C1','C3']],idx[:,'foo']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
A3,B0,C1,D1,204,206
A3,B0,C3,D0,216,218
A3,B0,C3,D1,220,222
A3,B1,C1,D0,232,234
A3,B1,C1,D1,236,238
A3,B1,C3,D0,248,250
A3,B1,C3,D1,252,254


In [26]:
#axis argument can also be used
dfmi.loc(axis=0)[:,:,['C1','C3']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C3,D0,25,24,27,26
A0,B0,C3,D1,29,28,31,30
A0,B1,C1,D0,41,40,43,42
A0,B1,C1,D1,45,44,47,46
A0,B1,C3,D0,57,56,59,58
A0,B1,C3,D1,61,60,63,62
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78


In [27]:
df2 = dfmi.copy()

In [28]:
df2.loc[idx[:,:,['C1','C2']],:] = df2 * 1000
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9000,8000,11000,10000
A0,B0,C1,D1,13000,12000,15000,14000
A0,B0,C2,D0,17000,16000,19000,18000
A0,B0,C2,D1,21000,20000,23000,22000
A0,B0,C3,D0,25,24,27,26
A0,B0,C3,D1,29,28,31,30
A0,B1,C0,D0,33,32,35,34
A0,B1,C0,D1,37,36,39,38


In [35]:
#cross section using the xs method to get the specific items in a level
df1 = df.copy();df1

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,2.540509,-0.232595,0.540136,-1.57319,-0.664569,1.806389,-0.978332,-0.492675
B,-1.338722,-1.28312,2.427546,-0.228754,1.651015,-0.756029,2.006049,0.339074
C,1.035052,1.823462,0.071748,-0.453054,0.073654,0.029799,-0.109934,0.41627


In [43]:
df1 = df1.T;df1

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,2.540509,-1.338722,1.035052
bar,two,-0.232595,-1.28312,1.823462
baz,one,0.540136,2.427546,0.071748
baz,two,-1.57319,-0.228754,-0.453054
foo,one,-0.664569,1.651015,0.073654
foo,two,1.806389,-0.756029,0.029799
qux,one,-0.978332,2.006049,-0.109934
qux,two,-0.492675,0.339074,0.41627


In [39]:
#using xs to get the specific values in a level
df1.xs('one', level='second')#this level will be dropped

Unnamed: 0_level_0,A,B,C
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,2.540509,-1.338722,1.035052
baz,0.540136,2.427546,0.071748
foo,-0.664569,1.651015,0.073654
qux,-0.978332,2.006049,-0.109934


In [44]:
df1.loc[(slice(None),'one'),:] #an alternative to the above xs

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,2.540509,-1.338722,1.035052
baz,one,0.540136,2.427546,0.071748
foo,one,-0.664569,1.651015,0.073654
qux,one,-0.978332,2.006049,-0.109934


In [45]:
df1 = df1.T;df1

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,2.540509,-0.232595,0.540136,-1.57319,-0.664569,1.806389,-0.978332,-0.492675
B,-1.338722,-1.28312,2.427546,-0.228754,1.651015,-0.756029,2.006049,0.339074
C,1.035052,1.823462,0.071748,-0.453054,0.073654,0.029799,-0.109934,0.41627


In [46]:
df1.xs('one',level='second',axis=1)

first,bar,baz,foo,qux
A,2.540509,0.540136,-0.664569,-0.978332
B,-1.338722,2.427546,1.651015,2.006049
C,1.035052,0.071748,0.073654,-0.109934


In [47]:
#using slicing
df1.loc[:,(slice(None),'one')]

first,bar,baz,foo,qux
second,one,one,one,one
A,2.540509,0.540136,-0.664569,-0.978332
B,-1.338722,2.427546,1.651015,2.006049
C,1.035052,0.071748,0.073654,-0.109934


In [48]:
#selecting multiple keys
df1.xs(('one','bar'),level=('second','first'), axis=1)

first,bar
second,one
A,2.540509
B,-1.338722
C,1.035052


In [49]:
#so as not to drop the levels using xs use drop_level=False;default = True
df1.xs('one',level='second',axis=1,drop_level=False)

first,bar,baz,foo,qux
second,one,one,one,one
A,2.540509,0.540136,-0.664569,-0.978332
B,-1.338722,2.427546,1.651015,2.006049
C,1.035052,0.071748,0.073654,-0.109934


In [50]:
df1.xs('one',level='second',axis=1,drop_level=True)

first,bar,baz,foo,qux
A,2.540509,0.540136,-0.664569,-0.978332
B,-1.338722,2.427546,1.651015,2.006049
C,1.035052,0.071748,0.073654,-0.109934


In [51]:
#advanced reindexong and alignment
midx = pd.MultiIndex(levels=[['zero','one'],['x','y']],labels=[[1,1,0,0],[1,0,1,0]]);midx

MultiIndex(levels=[['zero', 'one'], ['x', 'y']],
           labels=[[1, 1, 0, 0], [1, 0, 1, 0]])

In [52]:
df = pd.DataFrame(np.random.rand(4,2), index=midx);df

Unnamed: 0,Unnamed: 1,0,1
one,y,0.869394,0.542491
one,x,0.448291,0.663847
zero,y,0.097791,0.837894
zero,x,0.063235,0.292631


In [54]:
df2 = df.mean(level=0)
df2

Unnamed: 0,0,1
zero,0.080513,0.565262
one,0.658842,0.603169


In [55]:
df2.reindex(df.index,level=0)#just get the multiindex results rowwise.alternatively you can use loc
df2

Unnamed: 0,0,1
zero,0.080513,0.565262
one,0.658842,0.603169


In [56]:
#aligning
df_aligned,df2_aligned = df.align(df2,level=0)

In [57]:
df_aligned

Unnamed: 0,Unnamed: 1,0,1
one,y,0.869394,0.542491
one,x,0.448291,0.663847
zero,y,0.097791,0.837894
zero,x,0.063235,0.292631


In [58]:
df2_aligned

Unnamed: 0,Unnamed: 1,0,1
one,y,0.658842,0.603169
one,x,0.658842,0.603169
zero,y,0.080513,0.565262
zero,x,0.080513,0.565262


In [59]:
#swapping levels ie level 0 goes to 1 and 1 goes to 0
df[:5]

Unnamed: 0,Unnamed: 1,0,1
one,y,0.869394,0.542491
one,x,0.448291,0.663847
zero,y,0.097791,0.837894
zero,x,0.063235,0.292631


In [60]:
df[:5].swaplevel(0,1, axis = 0)

Unnamed: 0,Unnamed: 1,0,1
y,one,0.869394,0.542491
x,one,0.448291,0.663847
y,zero,0.097791,0.837894
x,zero,0.063235,0.292631


In [62]:
#reordering is just the same as swapevels.generalizes the swaplevel function
df[:5].reorder_levels([0,1], axis=0)

Unnamed: 0,Unnamed: 1,0,1
one,y,0.869394,0.542491
one,x,0.448291,0.663847
zero,y,0.097791,0.837894
zero,x,0.063235,0.292631


In [4]:
import random

In [5]:
random.shuffle(tuples)

In [6]:
s = pd.Series(np.random.randn(8), index = pd.MultiIndex.from_tuples(tuples));s

qux  one   -1.353345
baz  one    0.203875
bar  two    0.254615
qux  two    1.444647
baz  two   -0.402758
bar  one    1.011335
foo  two   -0.595313
     one    0.843896
dtype: float64

In [7]:
s.sort_index()

bar  one    1.011335
     two    0.254615
baz  one    0.203875
     two   -0.402758
foo  one    0.843896
     two   -0.595313
qux  one   -1.353345
     two    1.444647
dtype: float64

In [8]:
s.sort_index(level=0)

bar  one    1.011335
     two    0.254615
baz  one    0.203875
     two   -0.402758
foo  one    0.843896
     two   -0.595313
qux  one   -1.353345
     two    1.444647
dtype: float64

In [9]:
s.sort_index(level=1)

bar  one    1.011335
baz  one    0.203875
foo  one    0.843896
qux  one   -1.353345
bar  two    0.254615
baz  two   -0.402758
foo  two   -0.595313
qux  two    1.444647
dtype: float64

In [7]:
dfm = pd.DataFrame({'jim': [0, 0, 1, 1],
                    'joe': ['x', 'x', 'z', 'y'],
                    'jolie': np.random.rand(4)});dfm

Unnamed: 0,jim,joe,jolie
0,0,x,0.280052
1,0,x,0.557462
2,1,z,0.453465
3,1,y,0.953632


In [8]:
dfm.set_index(['jim','joe'])

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
0,x,0.280052
0,x,0.557462
1,z,0.453465
1,y,0.953632


In [None]:
dfm.loc[(1, 'z')]#gives me akey error

In [20]:
dfm.index.is_lexsorted_for_tuple(tuples)

True

In [None]:
dfm.index.lexsorted_depth()

In [10]:
dfm.sort_index()

Unnamed: 0,jim,joe,jolie
0,0,x,0.280052
1,0,x,0.557462
2,1,z,0.453465
3,1,y,0.953632


In [11]:
#take methods can be applied to index,series and dataframe
index = pd.Index(np.random.randint(0,1000,10));index

Int64Index([332, 460, 598, 815, 4, 671, 801, 394, 938, 537], dtype='int64')

In [12]:
positions = [0,9,3]

In [13]:
index[positions]

Int64Index([332, 537, 815], dtype='int64')

In [14]:
index.take(positions)

Int64Index([332, 537, 815], dtype='int64')

In [15]:
#series take
ser = pd.Series(np.random.randn(10))

In [16]:
ser.iloc[positions]

0    0.275995
9   -1.934645
3    1.169062
dtype: float64

In [17]:
ser.take(positions)

0    0.275995
9   -1.934645
3    1.169062
dtype: float64

In [18]:
frm = pd.DataFrame(np.random.randn(5,3));frm

Unnamed: 0,0,1,2
0,0.487721,-0.747114,0.088118
1,1.13772,-0.568732,-0.3956
2,-0.055563,-0.712744,-0.233242
3,-0.299188,-0.206602,0.629501
4,0.719603,0.499515,0.486259


In [19]:
frm.take([1,4,3])

Unnamed: 0,0,1,2
1,1.13772,-0.568732,-0.3956
4,0.719603,0.499515,0.486259
3,-0.299188,-0.206602,0.629501


In [20]:
frm.take([0,2],axis=1)

Unnamed: 0,0,2
0,0.487721,0.088118
1,1.13772,-0.3956
2,-0.055563,-0.233242
3,-0.299188,0.629501
4,0.719603,0.486259


In [21]:
#types of indexes categoricalindex,int64index,rangeindex,float64index and intervalindex
df = pd.DataFrame({'A': np.arange(6),
                  'B': list('aabbca')});df

Unnamed: 0,A,B
0,0,a
1,1,a
2,2,b
3,3,b
4,4,c
5,5,a


In [22]:
df['B'] = df['B'].astype('category', categories=list('cab'))

In [23]:
df

Unnamed: 0,A,B
0,0,a
1,1,a
2,2,b
3,3,b
4,4,c
5,5,a


In [24]:
df.dtypes

A       int32
B    category
dtype: object

In [25]:
df.B.cat.categories

Index(['c', 'a', 'b'], dtype='object')

In [27]:
df2 = df.set_index('B')

In [28]:
df2.index

CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category')

In [29]:
df2

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
a,0
a,1
b,2
b,3
c,4
a,5


In [30]:
df2.loc['a']

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
a,0
a,1
a,5


In [31]:
#thisone preserves the index
df2.loc['a'].index

CategoricalIndex(['a', 'a', 'a'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category')

In [32]:
#sorting this index
df2.sort_index()

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
c,4
a,0
a,1
a,5
b,2
b,3


In [33]:
#groupby on index
df2.groupby(level=0).sum()

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
c,4
a,6
b,5


In [34]:
df2.groupby(level=0).sum().index

CategoricalIndex(['c', 'a', 'b'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category')

In [35]:
#reindexing this operations
df2.reindex(['a','e'])

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
a,0.0
a,1.0
a,5.0
e,


In [36]:
df2.reindex(['a','e']).index

Index(['a', 'a', 'a', 'e'], dtype='object', name='B')

In [37]:
df2.reindex(pd.Categorical(['a','c'], categories=list('abcde')))

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
a,0
a,1
a,5
c,4


In [38]:
df2.reindex(pd.Categorical(['a','c'], categories=list('abcde'))).index

CategoricalIndex(['a', 'a', 'a', 'c'], categories=['a', 'b', 'c', 'd', 'e'], ordered=False, name='B', dtype='category')