<img src="http://imgur.com/1ZcRyrc.png" style="float: left; margin: 20px; height: 55px">

## A Simple `.melt()` and `.pivot_table()` Example




In [83]:
import pandas as pd
import numpy as np

#### Some reviews first:

With the following pandas DataFrame:

In [84]:
df1 = pd.DataFrame({'A': [1,2,3,4],
                    'B': ['b','b','b','b'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']},
                    index=[0, 1, 2, 3])

df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                    'B': ['B4', 'B5', 'B6', 'B7'],
                    'C': ['C4', 'C5', 'C6', 'C7'],
                    'D': ['D4', 'D5', 'D6', 'D7']},
                    index=[4, 5, 6, 7])

In groups of 2, I'd like each group to tackle one question (if you are done, with your question, try yourself on a different one:
    - delete column 'B' of df1 and sort by column 'A'
    - add a columns '007' with 0's on df1 and sort by column 'A'
    - merge df1, df2 horizontally on a same index base and sort by column 'A'
    - rename df2's columns to lowercase 
    - groupby df1 on 'B' with the mean of 'A' (and return a dataframe)
    - add a row with 1's on df2
    

#### Create some fake data about beer quality and ratings.

In [85]:
beers = {
    'name':['coors','bud','natural light','keystone ice',
            'sierra nevada', 'sam adams', 'new belgium',
            'odouls',
            'pbr','stella','chimay','magnolia','21amendment'],
    'class':['crap','crap','crap','crap',
             'mid','mid','mid',
             'notabeer',
             'pretentious','pretentious','pretentious','pretentious','pretentious']
}
beers['price'] = np.array([1.5,  1.7,  1.2,  1.2,  2.,  1.9,  2.1,  3.,  0.5,  3.5, 10.,
                           15.,  2.])
beers['rating'] = np.random.normal(5, 2, size=13)

In [86]:
beers

{'name': ['coors',
  'bud',
  'natural light',
  'keystone ice',
  'sierra nevada',
  'sam adams',
  'new belgium',
  'odouls',
  'pbr',
  'stella',
  'chimay',
  'magnolia',
  '21amendment'],
 'class': ['crap',
  'crap',
  'crap',
  'crap',
  'mid',
  'mid',
  'mid',
  'notabeer',
  'pretentious',
  'pretentious',
  'pretentious',
  'pretentious',
  'pretentious'],
 'price': array([ 1.5,  1.7,  1.2,  1.2,  2. ,  1.9,  2.1,  3. ,  0.5,  3.5, 10. ,
        15. ,  2. ]),
 'rating': array([6.60463883, 6.23019692, 4.84476183, 4.49433225, 4.54012793,
        5.39192845, 3.40673461, 4.99367552, 1.52642376, 7.1914847 ,
        2.07770209, 1.42844456, 5.88567694])}

In [87]:
# let's turn this dictionary into a dataframe
beers = pd.DataFrame(beers)
beers

Unnamed: 0,class,name,price,rating
0,crap,coors,1.5,6.604639
1,crap,bud,1.7,6.230197
2,crap,natural light,1.2,4.844762
3,crap,keystone ice,1.2,4.494332
4,mid,sierra nevada,2.0,4.540128
5,mid,sam adams,1.9,5.391928
6,mid,new belgium,2.1,3.406735
7,notabeer,odouls,3.0,4.993676
8,pretentious,pbr,0.5,1.526424
9,pretentious,stella,3.5,7.191485


In [88]:
beers.to_clipboard()

#### 1. Example of melting the beer data into long format using the name of the beer as the ID.

In [89]:
beers_long = pd.melt(beers, id_vars=['name'])
beers_long.sort_values('name', axis=0, inplace=True)
beers_long.head()

Unnamed: 0,name,variable,value
38,21amendment,rating,5.88568
25,21amendment,price,2
12,21amendment,class,pretentious
1,bud,class,crap
27,bud,rating,6.2302


#### 2. Use `.pivot_table()` on the original beer DataFrame to summarize the mean and standard deviation for rating and price by class.

In [113]:
beers

Unnamed: 0,class,name,price,rating
0,crap,coors,1.5,6.604639
1,crap,bud,1.7,6.230197
2,crap,natural light,1.2,4.844762
3,crap,keystone ice,1.2,4.494332
4,mid,sierra nevada,2.0,4.540128
5,mid,sam adams,1.9,5.391928
6,mid,new belgium,2.1,3.406735
7,notabeer,odouls,3.0,4.993676
8,pretentious,pbr,0.5,1.526424
9,pretentious,stella,3.5,7.191485


In [131]:
beer_class_summary = pd.pivot_table(beers, index='class', values=['rating','price'],
                                   aggfunc=[np.mean])
#                                    aggfunc=[np.mean, np.std])

In [132]:
beer_class_summary

Unnamed: 0_level_0,mean,mean
Unnamed: 0_level_1,price,rating
class,Unnamed: 1_level_2,Unnamed: 2_level_2
crap,1.4,5.543482
mid,2.0,4.446264
notabeer,3.0,4.993676
pretentious,6.2,3.621946


#### 3. Go from long format back to wide format using `.pivot_table()` and a custom aggregate function.

In [133]:
# from #1.
beers_long.head()

Unnamed: 0,name,variable,value
38,21amendment,rating,5.88568
25,21amendment,price,2
12,21amendment,class,pretentious
1,bud,class,crap
27,bud,rating,6.2302


In [134]:
mask = beers_long.variable == 'rating'

In [137]:
beers_long_msk = beers_long[mask].copy()

In [138]:
beers_long_msk.dtypes

name        object
variable    object
value       object
dtype: object

In [139]:
beers_long_msk

Unnamed: 0,name,variable,value
38,21amendment,rating,5.88568
27,bud,rating,6.2302
36,chimay,rating,2.0777
26,coors,rating,6.60464
29,keystone ice,rating,4.49433
37,magnolia,rating,1.42844
28,natural light,rating,4.84476
32,new belgium,rating,3.40673
33,odouls,rating,4.99368
34,pbr,rating,1.52642


In [140]:
beer_value = beers_long_msk.pop('value')

In [141]:
beers_long_msk['value'] = beer_value.astype(float)

In [142]:
beers_long_msk

Unnamed: 0,name,variable,value
38,21amendment,rating,5.885677
27,bud,rating,6.230197
36,chimay,rating,2.077702
26,coors,rating,6.604639
29,keystone ice,rating,4.494332
37,magnolia,rating,1.428445
28,natural light,rating,4.844762
32,new belgium,rating,3.406735
33,odouls,rating,4.993676
34,pbr,rating,1.526424


In [143]:
new_beers_wide = pd.pivot_table(beers_long_msk, index='name', values='value',
                                columns=['variable'], aggfunc=np.mean)
new_beers_wide.reset_index(drop=False, inplace=True)

In [144]:
new_beers_wide


variable,name,rating
0,21amendment,5.885677
1,bud,6.230197
2,chimay,2.077702
3,coors,6.604639
4,keystone ice,4.494332
5,magnolia,1.428445
6,natural light,4.844762
7,new belgium,3.406735
8,odouls,4.993676
9,pbr,1.526424


In [145]:
new_beers_wide.rating.sum()

58.61612839382007

In [146]:
beers.rating.sum()

58.61612839382007