<img src="http://imgur.com/1ZcRyrc.png" style="float: left; margin: 20px; height: 55px">

## A simple melt and pivot table example

_Authors: Kiefer Katovich (SF)_

---


In [2]:
import pandas as pd
import numpy as np

#### Create some fake data about beer quality and ratings.

In [3]:
beers = {
    'name':['coors','bud','natural light','keystone ice',
            'sierra nevada', 'sam adams', 'rolling rock',
            'odouls',
            'pbr','stella','chimay','magnolia','21amendment'],
    'class':['crap','crap','crap','crap',
             'mid','mid','mid',
             'notabeer',
             'pretentious','pretentious','pretentious','pretentious','pretentious']
}
beers['price'] = np.concatenate([np.array([1.5, 1.7, 1.2, 1.2]),
                                np.array([2., 1.9, 2.1]),
                                np.array([3.]),
                                np.array([0.5,3.5, 10., 15.,2.])])
beers['rating'] = np.random.normal(5, 2, size=13)

for k, v in beers.items():
    print k, len(v)
beers = pd.DataFrame(beers)
beers

rating 13
price 13
name 13
class 13


Unnamed: 0,class,name,price,rating
0,crap,coors,1.5,6.468463
1,crap,bud,1.7,7.010511
2,crap,natural light,1.2,2.969393
3,crap,keystone ice,1.2,9.833387
4,mid,sierra nevada,2.0,5.929486
5,mid,sam adams,1.9,3.461477
6,mid,rolling rock,2.1,3.709538
7,notabeer,odouls,3.0,5.49213
8,pretentious,pbr,0.5,4.290215
9,pretentious,stella,3.5,5.375522


#### Melt the beer data into long format with the name of the beer as the id

In [4]:
beers_long = pd.melt(beers, id_vars=['name'])
beers_long.sort_values('name', axis=0)

Unnamed: 0,name,variable,value
38,21amendment,rating,3.68992
25,21amendment,price,2
12,21amendment,class,pretentious
1,bud,class,crap
27,bud,rating,7.01051
14,bud,price,1.7
36,chimay,rating,3.13491
23,chimay,price,10
10,chimay,class,pretentious
26,coors,rating,6.46846


#### Use pivot table on the original beer dataframe to summarize the mean and standard deviation for rating and price by class.

In [5]:
beer_class_summary = pd.pivot_table(beers, index='class', values=['rating','price'],
                                   aggfunc=[np.mean, np.std])

In [6]:
beer_class_summary

Unnamed: 0_level_0,mean,mean,std,std
Unnamed: 0_level_1,price,rating,price,rating
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
crap,1.4,6.570438,0.244949,2.817707
mid,2.0,4.366834,0.1,1.358969
notabeer,3.0,5.49213,,
pretentious,6.2,4.252578,6.109419,0.880103


#### Go from long-format back to wide format using pivot table and a custom aggregation function.

In [7]:
beers_long.head(3)

Unnamed: 0,name,variable,value
0,coors,class,crap
1,bud,class,crap
2,natural light,class,crap


In [8]:
def first_item(series):
    item = series.iloc[0]
    return item

new_beers_wide = pd.pivot_table(beers_long, index='name', values='value',
                                columns=['variable'], aggfunc=first_item)
new_beers_wide.reset_index()

variable,name,class,price,rating
0,21amendment,pretentious,2.0,3.68992
1,bud,crap,1.7,7.01051
2,chimay,pretentious,10.0,3.13491
3,coors,crap,1.5,6.46846
4,keystone ice,crap,1.2,9.83339
5,magnolia,pretentious,15.0,4.77233
6,natural light,crap,1.2,2.96939
7,odouls,notabeer,3.0,5.49213
8,pbr,pretentious,0.5,4.29022
9,rolling rock,mid,2.1,3.70954
