In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(np.random.randn(4,3), index = [['a','a','b','b'], [1,2,1,2]], 
                    columns=[['Ohio','Ohio','Colorado'], ['Green','Red','Green']])

In [3]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0.300754,2.161724,-1.021407
a,2,0.53295,-1.016011,-0.564654
b,1,0.211788,-1.351669,1.976249
b,2,-0.799274,0.136439,0.989248


In [4]:
df.index.names = ['key1','key2']

In [5]:
df.columns.names = ['state','color']

In [6]:
df

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0.300754,2.161724,-1.021407
a,2,0.53295,-1.016011,-0.564654
b,1,0.211788,-1.351669,1.976249
b,2,-0.799274,0.136439,0.989248


In [14]:
df.swaplevel('key1','key2')

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0.300754,2.161724,-1.021407
2,a,0.53295,-1.016011,-0.564654
1,b,0.211788,-1.351669,1.976249
2,b,-0.799274,0.136439,0.989248


In [None]:
### As you can see, the first and level indices are swapped with their original names assigned respectively

In [8]:
df

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0.300754,2.161724,-1.021407
a,2,0.53295,-1.016011,-0.564654
b,1,0.211788,-1.351669,1.976249
b,2,-0.799274,0.136439,0.989248


In [15]:
### sortlevel() on the other hand, sorts the data using only the values in a single level.
### When swapping levels, its not uncommon to also use sortlevel so that the result is lexicographically sorted

In [20]:
df.sortlevel(1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0.300754,2.161724,-1.021407
b,1,0.211788,-1.351669,1.976249
a,2,0.53295,-1.016011,-0.564654
b,2,-0.799274,0.136439,0.989248


In [22]:
df.swaplevel(0,1).sortlevel(1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0.300754,2.161724,-1.021407
2,a,0.53295,-1.016011,-0.564654
1,b,0.211788,-1.351669,1.976249
2,b,-0.799274,0.136439,0.989248


In [None]:
### The indices have been swapped and the index level 1 has been sorted as a,a,b,b

In [23]:
### If you wanna sort the level 0 index

df.swaplevel(0,1).sortlevel(0)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0.300754,2.161724,-1.021407
1,b,0.211788,-1.351669,1.976249
2,a,0.53295,-1.016011,-0.564654
2,b,-0.799274,0.136439,0.989248


In [24]:
### Data performance is much better on hierarchially indexed objects if the index is lexicographically sorted 
### starting with the outer-most level .i.e. calling sortlevel(0)

In [25]:
### Many descriptive and summary statistics on DataFrame and Series have a level option in which we can specify the level,
### where we wanna perform an operation.

In [26]:
df.sum(level='key2')

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,0.512542,0.810054,0.954842
2,-0.266324,-0.879572,0.424594


In [27]:
### In the example above, a and b of 1 and 2 respectively are added together as the level mentioned is key1.

### Lets try key2

df.sum(level='key1')

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,0.833704,1.145713,-1.586061
b,-0.587486,-1.21523,2.965497


In [28]:
df

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0.300754,2.161724,-1.021407
a,2,0.53295,-1.016011,-0.564654
b,1,0.211788,-1.351669,1.976249
b,2,-0.799274,0.136439,0.989248


In [2]:
### The way it works is, a.1 and a.2 will be added together.. Likewise, b.1 and b.2 will be added together.

In [3]:
### We could also use one or more of the columns of data frame as the row index
### Alternatively, you could also move the row index into the DataFrame's columns.

df_ex = pd.DataFrame({'a':range(7), 'b':range(7,0,-1),
                     'c':['one','one','one','two','two','two','two'],
                     'd':[0,1,2,3,3,4,1]})

In [5]:
df_ex

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,3
4,4,3,two,3
5,5,2,two,4
6,6,1,two,1


In [6]:
### the set_index function will create a new DataFrame using one or more of its columns as the Index

In [7]:
df_e = df_ex.set_index(['c','d'])

In [8]:
df_e

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,3,3,4
two,3,4,3
two,4,5,2
two,1,6,1


In [9]:
### By default the columns are removed from the DF, though you can leave them intact using

df_e = df_ex.set_index(['c','d'], drop=False)

In [10]:
df_e

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,0,0,7,one,0
one,1,1,6,one,1
one,2,2,5,one,2
two,3,3,4,two,3
two,3,4,3,two,3
two,4,5,2,two,4
two,1,6,1,two,1


In [11]:
### reset_index will do the opposite. The hierarchial index levels are removed and moved into the columns

In [14]:
df_e = df_ex.set_index(['c','d'], drop=True)

In [15]:
df_e.reset_index()

Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,one,2,2,5
3,two,3,3,4
4,two,3,4,3
5,two,4,5,2
6,two,1,6,1


In [16]:
### This is how the new data frame will look like when the reset_index is performed