# Hierarchical Indexing in Pandas

In [1]:
import numpy as np
import pandas as pd

In [2]:
data=pd.Series(np.random.randn(8),
               index=[["a","a","a","b",
                       "b","b","c","c"],
                      [1,2,3,1,2,3,1,2]])
data

a  1    0.022235
   2    0.007393
   3   -3.081152
b  1   -0.673017
   2   -0.034024
   3    0.679701
c  1    1.175051
   2    0.916181
dtype: float64

In [3]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 2),
            ('b', 3),
            ('c', 1),
            ('c', 2)],
           )

In [4]:
data["a"]

1    0.022235
2    0.007393
3   -3.081152
dtype: float64

In [5]:
data["b":"c"]

b  1   -0.673017
   2   -0.034024
   3    0.679701
c  1    1.175051
   2    0.916181
dtype: float64

In [6]:
data.loc[["a","c"]]

a  1    0.022235
   2    0.007393
   3   -3.081152
c  1    1.175051
   2    0.916181
dtype: float64

In [7]:
data.loc[:,1]

a    0.022235
b   -0.673017
c    1.175051
dtype: float64

In [8]:
data.unstack()

Unnamed: 0,1,2,3
a,0.022235,0.007393,-3.081152
b,-0.673017,-0.034024,0.679701
c,1.175051,0.916181,


In [9]:
data.unstack().stack()

a  1    0.022235
   2    0.007393
   3   -3.081152
b  1   -0.673017
   2   -0.034024
   3    0.679701
c  1    1.175051
   2    0.916181
dtype: float64

In [10]:
df=pd.DataFrame(
    np.arange(12).reshape(4,3),
    index=[["a","a","b","b"],
           [1,2,1,2]],
    columns=[["num","num","ver"],
             ["math","stat","geo"]])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,num,num,ver
Unnamed: 0_level_1,Unnamed: 1_level_1,math,stat,geo
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [11]:
df.index.names=["class","exam"]
df.columns.names=["field","lesson"]
df

Unnamed: 0_level_0,field,num,num,ver
Unnamed: 0_level_1,lesson,math,stat,geo
class,exam,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [12]:
df["num"]

Unnamed: 0_level_0,lesson,math,stat
class,exam,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [13]:
df.swaplevel("class","exam")

Unnamed: 0_level_0,field,num,num,ver
Unnamed: 0_level_1,lesson,math,stat,geo
exam,class,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [14]:
df.sort_index(level=1)

Unnamed: 0_level_0,field,num,num,ver
Unnamed: 0_level_1,lesson,math,stat,geo
class,exam,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


## Summary Statistics by Level

In [15]:
df.sum(level="exam")

field,num,num,ver
lesson,math,stat,geo
exam,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,6,8,10
2,12,14,16


In [16]:
df.sum(level="field",axis=1)

Unnamed: 0_level_0,field,num,ver
class,exam,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,1,2
a,2,7,5
b,1,13,8
b,2,19,11


## Indexing with a DataFrame’s columns

In [17]:
data=pd.DataFrame(
    {"x":range(8),"y":range(8,0,-1),
     "a":["one","one","one","one","two",
          "two","two","two"],
     "b":[0,1,2,3,0,1,2,3]})
data

Unnamed: 0,x,y,a,b
0,0,8,one,0
1,1,7,one,1
2,2,6,one,2
3,3,5,one,3
4,4,4,two,0
5,5,3,two,1
6,6,2,two,2
7,7,1,two,3


In [18]:
data2=data.set_index(["a","b"])
data2

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y
a,b,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,8
one,1,1,7
one,2,2,6
one,3,3,5
two,0,4,4
two,1,5,3
two,2,6,2
two,3,7,1


In [19]:
data3=data.set_index(["a","b"],drop=False)
data3

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y,a,b
a,b,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,0,0,8,one,0
one,1,1,7,one,1
one,2,2,6,one,2
one,3,3,5,one,3
two,0,4,4,two,0
two,1,5,3,two,1
two,2,6,2,two,2
two,3,7,1,two,3


In [20]:
data2

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y
a,b,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,8
one,1,1,7
one,2,2,6
one,3,3,5
two,0,4,4
two,1,5,3
two,2,6,2
two,3,7,1


In [21]:
data2.reset_index()

Unnamed: 0,a,b,x,y
0,one,0,0,8
1,one,1,1,7
2,one,2,2,6
3,one,3,3,5
4,two,0,4,4
5,two,1,5,3
6,two,2,6,2
7,two,3,7,1
