In [1]:
import pandas as pd
import numpy as np

### Reshaping with heirarchial: stack and unstack

In [2]:
# unstack -> row to col
# stack -> col to row
data = pd.DataFrame(np.arange(6).reshape((2, 3)),
  index=pd.Index(['Ohio', 'Colorado'], name='state'),
  columns=pd.Index(['one', 'two', 'three'],
 name='number'))
data

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [5]:
res = data.stack() # cols to rows innermost idx , multilvel indexed Series
res

state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int32

In [6]:
# unstack : back to df by default innermost unstacked
res.unstack()


number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [7]:
# to unstack other levels
res.unstack(0) # or .unstack('state')

state,Ohio,Colorado
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


In [9]:
# unstackin may lead to NaNs if not all levels are not in others group
s1 = pd.Series([0, 1, 2, 3], index=['a', 'b', 'c', 'd'])
s2 = pd.Series([4, 5, 6], index=['c', 'd', 'e'])
print(s1,'\n',s2)

a    0
b    1
c    2
d    3
dtype: int64 
 c    4
d    5
e    6
dtype: int64


In [10]:
res = pd.concat([s1,s2], keys=["one", "two"])
res

one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: int64

In [12]:
res.unstack() #default 1 inner

Unnamed: 0,a,b,c,d,e
one,0.0,1.0,2.0,3.0,
two,,,4.0,5.0,6.0


In [13]:
res.unstack().stack() #restackin removes NA


one  a    0.0
     b    1.0
     c    2.0
     d    3.0
two  c    4.0
     d    5.0
     e    6.0
dtype: float64

In [14]:
#dropna = False
res.unstack().stack(dropna=False)


one  a    0.0
     b    1.0
     c    2.0
     d    3.0
     e    NaN
two  a    NaN
     b    NaN
     c    4.0
     d    5.0
     e    6.0
dtype: float64

In [20]:
# Dataframes
# unstacked level becomes lowest level in df
res = data.unstack()
df = pd.DataFrame({'left': res, 'right': res + 5},
 columns=pd.Index(['left', 'right'], name='side') )
df

Unnamed: 0_level_0,side,left,right
number,state,Unnamed: 2_level_1,Unnamed: 3_level_1
one,Ohio,0,5
one,Colorado,3,8
two,Ohio,1,6
two,Colorado,4,9
three,Ohio,2,7
three,Colorado,5,10


In [22]:
df.unstack('state')

side,left,left,right,right
state,Ohio,Colorado,Ohio,Colorado
number,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
one,0,3,5,8
two,1,4,6,9
three,2,5,7,10


In [23]:
# Stack also with axis name
df.unstack('state').stack('side')

Unnamed: 0_level_0,state,Ohio,Colorado
number,side,Unnamed: 2_level_1,Unnamed: 3_level_1
one,left,0,3
one,right,5,8
two,left,1,4
two,right,6,9
three,left,2,5
three,right,7,10
