# pandas.pivot_table

In [1]:
!Python -V

Python 3.7.4


Import

In [2]:
import pandas as pd
import numpy as np

## Pivot Tableの作成

In [3]:
df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
                          "bar", "bar", "bar", "bar"],
                    "B": ["one", "one", "one", "two", "two",
                          "one", "one", "two", "two"],
                    "C": ["small", "large", "large", "small",
                          "small", "large", "small", "small",
                          "large"],
                    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
                    "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]})
df

Unnamed: 0,A,B,C,D,E
0,foo,one,small,1,2
1,foo,one,large,2,4
2,foo,one,large,2,5
3,foo,two,small,3,5
4,foo,two,small,3,6
5,bar,one,large,4,6
6,bar,one,small,5,8
7,bar,two,small,6,9
8,bar,two,large,7,9


In [4]:
pd.pivot_table(df, values='D', index=['A', 'B'],
                     columns=['C'], aggfunc=np.sum)

Unnamed: 0_level_0,C,large,small
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,4.0,5.0
bar,two,7.0,6.0
foo,one,4.0,1.0
foo,two,,6.0


In [5]:
pivot_df = pd.pivot_table(df, values='D', index=['A', 'B'], 
                          columns=['C'], aggfunc=np.sum, fill_value = 0)
pivot_df

Unnamed: 0_level_0,C,large,small
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,4,5
bar,two,7,6
foo,one,4,1
foo,two,0,6


In [6]:
pd.pivot_table(df, values='D', index=['A', 'B'],
                     columns=['C'], aggfunc= lambda x: len(np.unique(x)))

Unnamed: 0_level_0,C,large,small
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,1.0,1.0
bar,two,1.0,1.0
foo,one,1.0,1.0
foo,two,,1.0


In [7]:
table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'],
                    aggfunc={'D': np.mean,
                             'E': [min, max, np.mean]})
table    

Unnamed: 0_level_0,Unnamed: 1_level_0,D,E,E,E
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,max,mean,min
A,C,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,large,5.5,9.0,7.5,6.0
bar,small,5.5,9.0,8.5,8.0
foo,large,2.0,5.0,4.5,4.0
foo,small,2.333333,6.0,4.333333,2.0


## Multiindexを用いたpivot tableの作成

In [8]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                     'foo', 'foo', 'qux', 'qux'],
                    ['one', 'two', 'one', 'two',
                     'one', 'two', 'one', 'two']]))

index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [9]:
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,1.178915,-0.421027
bar,two,0.362499,-0.227014
baz,one,-1.081949,0.62297
baz,two,-0.008162,0.503971
foo,one,-0.085287,0.838095
foo,two,0.389447,-0.693295
qux,one,-1.869498,-0.025119
qux,two,0.482544,-0.154233


### stackを用いた変換

In [10]:
stacked = df.stack()
stacked

first  second   
bar    one     A    1.178915
               B   -0.421027
       two     A    0.362499
               B   -0.227014
baz    one     A   -1.081949
               B    0.622970
       two     A   -0.008162
               B    0.503971
foo    one     A   -0.085287
               B    0.838095
       two     A    0.389447
               B   -0.693295
qux    one     A   -1.869498
               B   -0.025119
       two     A    0.482544
               B   -0.154233
dtype: float64

In [11]:
type(stacked)

pandas.core.series.Series

### unstack

In [12]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,1.178915,-0.421027
bar,two,0.362499,-0.227014
baz,one,-1.081949,0.62297
baz,two,-0.008162,0.503971
foo,one,-0.085287,0.838095
foo,two,0.389447,-0.693295
qux,one,-1.869498,-0.025119
qux,two,0.482544,-0.154233


In [13]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz,foo,qux
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,A,1.178915,-1.081949,-0.085287,-1.869498
one,B,-0.421027,0.62297,0.838095,-0.025119
two,A,0.362499,-0.008162,0.389447,0.482544
two,B,-0.227014,0.503971,-0.693295,-0.154233


In [14]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,1.178915,0.362499
bar,B,-0.421027,-0.227014
baz,A,-1.081949,-0.008162
baz,B,0.62297,0.503971
foo,A,-0.085287,0.389447
foo,B,0.838095,-0.693295
qux,A,-1.869498,0.482544
qux,B,-0.025119,-0.154233


## Pivot Tableの読み込みから変換

In [15]:
print(df)
df.to_excel('pivot_table.xlsx')

                     A         B
first second                    
bar   one     1.178915 -0.421027
      two     0.362499 -0.227014
baz   one    -1.081949  0.622970
      two    -0.008162  0.503971
foo   one    -0.085287  0.838095
      two     0.389447 -0.693295
qux   one    -1.869498 -0.025119
      two     0.482544 -0.154233


In [16]:
df_excel = pd.read_excel('pivot_table.xlsx')
df_excel

Unnamed: 0,first,second,A,B
0,bar,one,1.178915,-0.421027
1,,two,0.362499,-0.227014
2,baz,one,-1.081949,0.62297
3,,two,-0.008162,0.503971
4,foo,one,-0.085287,0.838095
5,,two,0.389447,-0.693295
6,qux,one,-1.869498,-0.025119
7,,two,0.482544,-0.154233


In [17]:
df_excel.fillna(method='ffill')

Unnamed: 0,first,second,A,B
0,bar,one,1.178915,-0.421027
1,bar,two,0.362499,-0.227014
2,baz,one,-1.081949,0.62297
3,baz,two,-0.008162,0.503971
4,foo,one,-0.085287,0.838095
5,foo,two,0.389447,-0.693295
6,qux,one,-1.869498,-0.025119
7,qux,two,0.482544,-0.154233
