# Hierarchial index

In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.Series(np.random.randn(9), index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c'], [1, 2, 3,1, 2, 3,1, 2, 3]])
data

a  1   -0.258267
   2   -0.943756
   3   -1.130199
b  1   -0.082841
   2   -0.487710
   3    0.473391
c  1    0.470184
   2   -0.656375
   3   -1.248528
dtype: float64

In [3]:
data.unstack()

Unnamed: 0,1,2,3
a,-0.258267,-0.943756,-1.130199
b,-0.082841,-0.48771,0.473391
c,0.470184,-0.656375,-1.248528


In [4]:
data.unstack().stack()

a  1   -0.258267
   2   -0.943756
   3   -1.130199
b  1   -0.082841
   2   -0.487710
   3    0.473391
c  1    0.470184
   2   -0.656375
   3   -1.248528
dtype: float64

In [5]:
data['a']

1   -0.258267
2   -0.943756
3   -1.130199
dtype: float64

In [6]:
data[:,1]

a   -0.258267
b   -0.082841
c    0.470184
dtype: float64

### Setting index names and swapping levels

In [7]:
data.swaplevel().sort_index()

1  a   -0.258267
   b   -0.082841
   c    0.470184
2  a   -0.943756
   b   -0.487710
   c   -0.656375
3  a   -1.130199
   b    0.473391
   c   -1.248528
dtype: float64

In [8]:
data.swaplevel().sort_index(level=1)

1  a   -0.258267
2  a   -0.943756
3  a   -1.130199
1  b   -0.082841
2  b   -0.487710
3  b    0.473391
1  c    0.470184
2  c   -0.656375
3  c   -1.248528
dtype: float64

In [9]:
data.index.names = ["level1", "level2"]

In [10]:
data.index.names

FrozenList(['level1', 'level2'])

In [11]:
data

level1  level2
a       1        -0.258267
        2        -0.943756
        3        -1.130199
b       1        -0.082841
        2        -0.487710
        3         0.473391
c       1         0.470184
        2        -0.656375
        3        -1.248528
dtype: float64

In [12]:
data.swaplevel('level2', 'level1')

level2  level1
1       a        -0.258267
2       a        -0.943756
3       a        -1.130199
1       b        -0.082841
2       b        -0.487710
3       b         0.473391
1       c         0.470184
2       c        -0.656375
3       c        -1.248528
dtype: float64

In [13]:
frame = pd.DataFrame(np.random.randn(16).reshape(4,4), index=[[1, 2, 2,1], ['a','a','b','b']], columns=[['mdu', 'mdu', 'maa', 'maa'],[1,2,1,2]])

In [14]:
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,mdu,mdu,maa,maa
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,1,2
1,a,-0.227817,0.294865,0.528213,0.024642
2,a,-0.540878,1.296235,-0.33895,-0.4639
2,b,-1.433836,-0.068398,0.51913,0.963867
1,b,0.442383,-0.640651,-0.254035,-2.978992


In [15]:
frame.swaplevel()

Unnamed: 0_level_0,Unnamed: 1_level_0,mdu,mdu,maa,maa
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,1,2
a,1,-0.227817,0.294865,0.528213,0.024642
a,2,-0.540878,1.296235,-0.33895,-0.4639
b,2,-1.433836,-0.068398,0.51913,0.963867
b,1,0.442383,-0.640651,-0.254035,-2.978992


In [16]:
frame.swaplevel(axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,1,2,1,2
Unnamed: 0_level_1,Unnamed: 1_level_1,mdu,mdu,maa,maa
1,a,-0.227817,0.294865,0.528213,0.024642
2,a,-0.540878,1.296235,-0.33895,-0.4639
2,b,-1.433836,-0.068398,0.51913,0.963867
1,b,0.442383,-0.640651,-0.254035,-2.978992


In [17]:
frame.index.names  = ['alpha','rnumber']
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,mdu,mdu,maa,maa
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,1,2
alpha,rnumber,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1,a,-0.227817,0.294865,0.528213,0.024642
2,a,-0.540878,1.296235,-0.33895,-0.4639
2,b,-1.433836,-0.068398,0.51913,0.963867
1,b,0.442383,-0.640651,-0.254035,-2.978992


In [18]:
frame.columns.names = ['city', 'cnumber']
frame

Unnamed: 0_level_0,city,mdu,mdu,maa,maa
Unnamed: 0_level_1,cnumber,1,2,1,2
alpha,rnumber,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1,a,-0.227817,0.294865,0.528213,0.024642
2,a,-0.540878,1.296235,-0.33895,-0.4639
2,b,-1.433836,-0.068398,0.51913,0.963867
1,b,0.442383,-0.640651,-0.254035,-2.978992


### Aggregation

In [19]:
frame.sum(level=0)

city,mdu,mdu,maa,maa
cnumber,1,2,1,2
alpha,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,0.214566,-0.345785,0.274178,-2.95435
2,-1.974713,1.227837,0.18018,0.499967


In [20]:
frame.sum(level=1, axis=1)

Unnamed: 0_level_0,cnumber,1,2
alpha,rnumber,Unnamed: 2_level_1,Unnamed: 3_level_1
1,a,0.300396,0.319508
2,a,-0.879827,0.832335
2,b,-0.914706,0.895469
1,b,0.188348,-3.619643


In [21]:
frame.sum(level=1)

city,mdu,mdu,maa,maa
cnumber,1,2,1,2
rnumber,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,-0.768695,1.591101,0.189264,-0.439258
b,-0.991453,-0.709049,0.265095,-2.015125


In [22]:
frame.sum(level='city', axis=1)

Unnamed: 0_level_0,city,mdu,maa
alpha,rnumber,Unnamed: 2_level_1,Unnamed: 3_level_1
1,a,0.067048,0.552856
2,a,0.755358,-0.80285
2,b,-1.502234,1.482996
1,b,-0.198268,-3.233027


In [23]:
frame.sum(axis=1)

alpha  rnumber
1      a          0.619904
2      a         -0.047492
       b         -0.019237
1      b         -3.431295
dtype: float64

### Resetting index

In [24]:
frame = pd.DataFrame({'a':range(7), 'b':range(7, 0, -1), 'c':['one', 'one','one', 'two', 'two', 'two','two'], 'd':[0,1,2,3,0,1,2]})
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,3
4,4,3,two,0
5,5,2,two,1
6,6,1,two,2


In [25]:
frame.set_index(['b','c'])

Unnamed: 0_level_0,Unnamed: 1_level_0,a,d
b,c,Unnamed: 2_level_1,Unnamed: 3_level_1
7,one,0,0
6,one,1,1
5,one,2,2
4,two,3,3
3,two,4,0
2,two,5,1
1,two,6,2


In [26]:
frame.set_index(['b','c'], drop=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d
b,c,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7,one,0,7,one,0
6,one,1,6,one,1
5,one,2,5,one,2
4,two,3,4,two,3
3,two,4,3,two,0
2,two,5,2,two,1
1,two,6,1,two,2


In [27]:
frame.set_index(['b','c']).reset_index()

Unnamed: 0,b,c,a,d
0,7,one,0,0
1,6,one,1,1
2,5,one,2,2
3,4,two,3,3
4,3,two,4,0
5,2,two,5,1
6,1,two,6,2
