<img src="http://imgur.com/1ZcRyrc.png" style="float: left; margin: 20px; height: 55px">

## A Simple `.melt()` and `.pivot_table()` Example




In [1]:
import pandas as pd
import numpy as np

C:\Install\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.CSRRD7HKRKC3T3YXA7VY7TAZGLSWDKW6.gfortran-win_amd64.dll
C:\Install\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.IPBC74C7KURV7CB2PKT5Z5FNR3SIBV4J.gfortran-win_amd64.dll
  stacklevel=1)


## Today: Wide and Long formats with `pandas`:

#### Create some fake data about beer quality and ratings.

In [2]:
beers = {
    'name':['coors','bud','natural light','keystone ice',
            'sierra nevada', 'sam adams', 'new belgium',
            'odouls',
            'pbr','stella','chimay','magnolia','21amendment'],
    'class':['crap','crap','crap','crap',
             'mid','mid','mid',
             'notabeer',
             'pretentious','pretentious','pretentious','pretentious','pretentious']
}
beers['price'] = np.array([1.5,  1.7,  1.2,  1.2,  2.,  1.9,  2.1,  3.,  0.5,  3.5, 10.,
                           15.,  2.])
beers['rating'] = np.random.normal(5, 2, size=13)

In [3]:
beers

{'name': ['coors',
  'bud',
  'natural light',
  'keystone ice',
  'sierra nevada',
  'sam adams',
  'new belgium',
  'odouls',
  'pbr',
  'stella',
  'chimay',
  'magnolia',
  '21amendment'],
 'class': ['crap',
  'crap',
  'crap',
  'crap',
  'mid',
  'mid',
  'mid',
  'notabeer',
  'pretentious',
  'pretentious',
  'pretentious',
  'pretentious',
  'pretentious'],
 'price': array([ 1.5,  1.7,  1.2,  1.2,  2. ,  1.9,  2.1,  3. ,  0.5,  3.5, 10. ,
        15. ,  2. ]),
 'rating': array([4.91827439, 4.29741841, 4.8541658 , 3.99176656, 2.12780488,
        4.5598422 , 2.03051626, 4.07670714, 5.99961407, 6.27158141,
        6.38731188, 6.06179088, 5.04082174])}

In [4]:
# let's turn this dictionary into a dataframe
beers = pd.DataFrame(beers)
beers.head()

Unnamed: 0,name,class,price,rating
0,coors,crap,1.5,4.918274
1,bud,crap,1.7,4.297418
2,natural light,crap,1.2,4.854166
3,keystone ice,crap,1.2,3.991767
4,sierra nevada,mid,2.0,2.127805


In [5]:
beers.to_clipboard()

In [25]:
beers

Unnamed: 0,name,class,price,rating
0,coors,crap,1.5,4.918274
1,bud,crap,1.7,4.297418
2,natural light,crap,1.2,4.854166
3,keystone ice,crap,1.2,3.991767
4,sierra nevada,mid,2.0,2.127805
5,sam adams,mid,1.9,4.559842
6,new belgium,mid,2.1,2.030516
7,odouls,notabeer,3.0,4.076707
8,pbr,pretentious,0.5,5.999614
9,stella,pretentious,3.5,6.271581


#### 1. Example of melting the beer data into long format using the name of the beer as the ID.

In [7]:
beers_long = pd.melt(beers, id_vars=['name'])
beers_long.sort_values('name', axis=0, inplace=True)
beers_long.head()

Unnamed: 0,name,variable,value
38,21amendment,rating,5.04082
25,21amendment,price,2
12,21amendment,class,pretentious
1,bud,class,crap
27,bud,rating,4.29742


#### 2. Use `.pivot_table()` on the original beer DataFrame to summarize the mean and standard deviation for rating and price by class.

In [27]:
beers

Unnamed: 0,name,class,price,rating
0,coors,crap,1.5,4.918274
1,bud,crap,1.7,4.297418
2,natural light,crap,1.2,4.854166
3,keystone ice,crap,1.2,3.991767
4,sierra nevada,mid,2.0,2.127805
5,sam adams,mid,1.9,4.559842
6,new belgium,mid,2.1,2.030516
7,odouls,notabeer,3.0,4.076707
8,pbr,pretentious,0.5,5.999614
9,stella,pretentious,3.5,6.271581


In [9]:
beer_class_summary = pd.pivot_table(beers, index='class', values=['rating','price'],
                                   aggfunc=[np.mean])
#                                    aggfunc=[np.mean, np.std])

In [10]:
beer_class_summary

Unnamed: 0_level_0,mean,mean
Unnamed: 0_level_1,price,rating
class,Unnamed: 1_level_2,Unnamed: 2_level_2
crap,1.4,4.515406
mid,2.0,2.906054
notabeer,3.0,4.076707
pretentious,6.2,5.952224


#### 3. Go from long format back to wide format using `.pivot_table()` and a custom aggregate function.

In [11]:
# from #1.
beers_long.head()

Unnamed: 0,name,variable,value
38,21amendment,rating,5.04082
25,21amendment,price,2
12,21amendment,class,pretentious
1,bud,class,crap
27,bud,rating,4.29742


In [12]:
mask = beers_long.variable == 'rating'

In [13]:
beers_long_msk = beers_long[mask].copy()

In [14]:
beers_long_msk.dtypes

name        object
variable    object
value       object
dtype: object

In [15]:
beers_long_msk

Unnamed: 0,name,variable,value
38,21amendment,rating,5.04082
27,bud,rating,4.29742
36,chimay,rating,6.38731
26,coors,rating,4.91827
29,keystone ice,rating,3.99177
37,magnolia,rating,6.06179
28,natural light,rating,4.85417
32,new belgium,rating,2.03052
33,odouls,rating,4.07671
34,pbr,rating,5.99961


In [16]:
beer_value = beers_long_msk.pop('value')
beer_value

38    5.04082
27    4.29742
36    6.38731
26    4.91827
29    3.99177
37    6.06179
28    4.85417
32    2.03052
33    4.07671
34    5.99961
31    4.55984
30     2.1278
35    6.27158
Name: value, dtype: object

In [17]:
beers_long_msk['value'] = beer_value.astype(float)

In [18]:
beers_long_msk

Unnamed: 0,name,variable,value
38,21amendment,rating,5.040822
27,bud,rating,4.297418
36,chimay,rating,6.387312
26,coors,rating,4.918274
29,keystone ice,rating,3.991767
37,magnolia,rating,6.061791
28,natural light,rating,4.854166
32,new belgium,rating,2.030516
33,odouls,rating,4.076707
34,pbr,rating,5.999614


In [20]:
new_beers_wide = pd.pivot_table(beers_long_msk, index='name', values='value',
                                columns=['variable'], aggfunc=np.mean)
new_beers_wide

variable,rating
name,Unnamed: 1_level_1
21amendment,5.040822
bud,4.297418
chimay,6.387312
coors,4.918274
keystone ice,3.991767
magnolia,6.061791
natural light,4.854166
new belgium,2.030516
odouls,4.076707
pbr,5.999614


In [21]:
new_beers_wide.reset_index(drop=False, inplace=True)
new_beers_wide

variable,name,rating
0,21amendment,5.040822
1,bud,4.297418
2,chimay,6.387312
3,coors,4.918274
4,keystone ice,3.991767
5,magnolia,6.061791
6,natural light,4.854166
7,new belgium,2.030516
8,odouls,4.076707
9,pbr,5.999614
