## Stack and unstack

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
# Create DataFrame
dframe1 = DataFrame(np.arange(8).reshape((2, 4)),
                 index=pd.Index(['Tom', 'Bob'], name='student'),
                 columns=pd.Index(['A', 'B', 'C','D'], name='letter'))

In [3]:
dframe1

letter,A,B,C,D
student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tom,0,1,2,3
Bob,4,5,6,7


In [4]:
# Stack to pivot the columns into the rows
dframe_st = dframe1.stack()

In [5]:
dframe_st 

student  letter
Tom      A         0
         B         1
         C         2
         D         3
Bob      A         4
         B         5
         C         6
         D         7
dtype: int64

In [6]:
# Rearrange back into a DataFrame
dframe_st.unstack()

letter,A,B,C,D
student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tom,0,1,2,3
Bob,4,5,6,7


In [7]:
# Choose level to unstack
dframe_st.unstack(0)

student,Tom,Bob
letter,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0,4
B,1,5
C,2,6
D,3,7


In [8]:
# Also by which name to unstack by
dframe_st.unstack('letter')

letter,A,B,C,D
student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tom,0,1,2,3
Bob,4,5,6,7


In [9]:
# By which name to unstack by
dframe_st.unstack('student')

student,Tom,Bob
letter,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0,4
B,1,5
C,2,6
D,3,7


In [10]:
# Let's see how stack and unstack handle NAN

#Make two series
ser1 = Series([1, 2, 3], index=['A', 'B', 'C'])
ser2 = Series([7, 6, 5], index=['X', 'Y', 'Z'])

#Concat to make a dframe
dframe = pd.concat([ser1, ser2], keys=['Alpha', 'Beta'])

# Unstack resulting DataFrame
dframe.unstack()

Unnamed: 0,A,B,C,X,Y,Z
Alpha,1.0,2.0,3.0,,,
Beta,,,,7.0,6.0,5.0


In [11]:
# Stack will filter out NAN by default
dframe.unstack().stack()

Alpha  A    1.0
       B    2.0
       C    3.0
Beta   X    7.0
       Y    6.0
       Z    5.0
dtype: float64

In [12]:
# IF we dont want this we can set it to False
dframe.unstack().stack(dropna=False)

Alpha  A    1.0
       B    2.0
       C    3.0
       X    NaN
       Y    NaN
       Z    NaN
Beta   A    NaN
       B    NaN
       C    NaN
       X    7.0
       Y    6.0
       Z    5.0
dtype: float64