In [1]:
### Pandas Reshaping

## In this activity, we will learn what "stacking" means and what "pivot tables" are. 
## These are very important Pandas tools that don't exist in the usual relational database management systems.

In [2]:
### Stack


## The stack() method "compresses" a level in the DataFrame's columns. Let's give it a try.

## Firstly, we are going to create the data frame to work with.

In [5]:
import pandas as pd

In [6]:
import numpy as np

In [7]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                         'foo', 'foo', 'qux', 'qux'],
                        ['one', 'two', 'one', 'two',
                         'one', 'two', 'one', 'two']]))


In [8]:
 index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])


In [11]:
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])

In [12]:
 df2 = df[:4]

In [13]:
## Now, we are going to use the stack() function to "compress" the columns into the index.

In [14]:
stacked = df2.stack()

In [15]:
stacked

first  second   
bar    one     A    0.874181
               B   -0.137061
       two     A   -0.836224
               B    0.821686
baz    one     A    0.135790
               B   -0.673129
       two     A   -0.511226
               B   -0.250141
dtype: float64

In [16]:
## With a "stacked" DataFrame or Series (having a MultiIndex as the index), the inverse operation of stack() is unstack(), which by default unstacks the last level:

In [17]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.874181,-0.137061
bar,two,-0.836224,0.821686
baz,one,0.13579,-0.673129
baz,two,-0.511226,-0.250141


In [18]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,0.874181,-0.836224
bar,B,-0.137061,0.821686
baz,A,0.13579,-0.511226
baz,B,-0.673129,-0.250141


In [19]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.874181,0.13579
one,B,-0.137061,-0.673129
two,A,-0.836224,-0.511226
two,B,0.821686,-0.250141


In [20]:
### Pivot Tables


## Once more, let's create the data that we'll work with:


In [21]:
df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3,
                       'B': ['A', 'B', 'C'] * 4,
                       'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
                       'D': np.random.randn(12),
                       'E': np.random.randn(12)})

In [22]:
## We can produce pivot tables from this data very easily:

In [23]:
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.242472,0.396235
one,B,-0.919339,0.817883
one,C,1.041946,1.739112
three,A,0.17958,
three,B,,-1.360772
three,C,-1.090638,
two,A,,0.272215
two,B,-1.444612,
two,C,,0.688391


In [26]:
## [More info on creating pivot tables here](https://www.lumeer.io/pivot-table-complete-guide/)