## 任务4.5　创建透视表与交叉表

## 代码4-67　使用订单号作为透视表索引制作透视表

In [1]:
import pandas
import numpy
from sqlalchemy import create_engine
engine = create_engine('mssql+pymssql://sa:123456@localhost:1433/testdb?charset=utf8')
detail = pandas.read_sql_table('meal_order_detail1', con=engine)
detailPivot = pandas.pivot_table(detail[['order_id', 'counts', 'amounts']], index='order_id')
print('以order_id作为分组键创建的订单透视表为：', detailPivot.head(), sep='\n')

以order_id作为分组键创建的订单透视表为：
          amounts  counts
order_id                 
1002       32.000  1.0000
1003       30.125  1.2500
1004       43.875  1.0625
1008       63.000  1.0000
1011       57.700  1.0000


## 代码4-68　修改聚合函数后的透视表

In [2]:
detailPivot1 = pandas.pivot_table(
    detail[['order_id', 'counts', 'amounts']], index='order_id', aggfunc=numpy.sum
)
print('以order_id作为分组键创建的订单销量与售价总和透视表为：', detailPivot1.head(), sep='\n')

以order_id作为分组键创建的订单销量与售价总和透视表为：
          amounts  counts
order_id                 
1002        224.0     7.0
1003        241.0    10.0
1004        702.0    17.0
1008        315.0     5.0
1011        577.0    10.0


## 代码4-69　使用订单号和菜品名称作为索引的透视表

In [3]:
detailPivot2 = pandas.pivot_table(
    detail[['order_id', 'dishes_name', 'counts', 'amounts']],
    index=['order_id', 'dishes_name'], aggfunc=numpy.sum
)
print('以order_id和dishes_name作为分组键创建的订单销量与售价总和透视表为：', detailPivot2.head(), sep='\n')

以order_id和dishes_name作为分组键创建的订单销量与售价总和透视表为：
                      amounts  counts
order_id dishes_name                 
1002     凉拌菠菜            27.0     1.0
         南瓜枸杞小饼干         19.0     1.0
         焖猪手             58.0     1.0
         独家薄荷鲜虾牛肉卷       45.0     1.0
         白胡椒胡萝卜羊肉汤       35.0     1.0


## 代码4-70　指定菜品名称为列分组键的透视表

In [4]:
detailPivot3 = pandas.pivot_table(
    detail[['order_id', 'dishes_name', 'counts', 'amounts']],
    index='order_id', columns='dishes_name', aggfunc=numpy.sum
)
print('以order_id和dishes_name作为行列分组键创建的透视表前5行4列为：', detailPivot3.iloc[:5, :4], sep='\n')

以order_id和dishes_name作为行列分组键创建的透视表前5行4列为：
            amounts                        
dishes_name  42度海之蓝  北冰洋汽水  38度剑南春  50度古井贡酒
order_id                                   
1002            NaN     NaN     NaN     NaN
1003            NaN     NaN     NaN     NaN
1004            NaN     NaN     NaN     NaN
1008            NaN     NaN     NaN     NaN
1011           99.0     NaN     NaN     NaN


## 代码4-71　指定某些列制作透视表

In [5]:
detailPivot4 = pandas.pivot_table(
    detail[['order_id', 'dishes_name', 'counts', 'amounts']],
    index='order_id', values='counts', aggfunc=numpy.sum
)
print('以order_id作为行分组键counts作为值创建的透视表前5行为：', detailPivot4.head(), sep='\n')

以order_id作为行分组键counts作为值创建的透视表前5行为：
          counts
order_id        
1002         7.0
1003        10.0
1004        17.0
1008         5.0
1011        10.0


## 代码4-72　对透视表中的缺失值进行填充

In [6]:
detailPivot5 = pandas.pivot_table(
    detail[['order_id', 'dishes_name', 'counts', 'amounts']], 
    index='order_id', columns='dishes_name', aggfunc=numpy.sum, fill_value=0
)
print('空值填0后以order_id和dishes_name为行列分组键创建透视表前5行4列为：', detailPivot5.iloc[:5, :4], sep='\n')

空值填0后以order_id和dishes_name为行列分组键创建透视表前5行4列为：
            amounts                        
dishes_name  42度海之蓝  北冰洋汽水  38度剑南春  50度古井贡酒
order_id                                   
1002              0       0       0       0
1003              0       0       0       0
1004              0       0       0       0
1008              0       0       0       0
1011             99       0       0       0


## 代码4-73　在透视表中添加汇总数据

In [7]:
detailPivot6 = pandas.pivot_table(
    detail[['order_id', 'dishes_name', 'counts', 'amounts']], index='order_id',
    columns='dishes_name', aggfunc=numpy.sum, fill_value=0, margins=True
)
print('添加margins后以order_id和dishes_name为分组键的透视表前5行4列为：', detailPivot6.iloc[:5, :4], sep='\n')

添加margins后以order_id和dishes_name为分组键的透视表前5行4列为：
            amounts                        
dishes_name  42度海之蓝  北冰洋汽水  38度剑南春  50度古井贡酒
order_id                                   
1002              0       0       0       0
1003              0       0       0       0
1004              0       0       0       0
1008              0       0       0       0
1011             99       0       0       0


## 代码4-74　使用`crosstab`函数制作交叉表

In [8]:
detailCross = pandas.crosstab(
    index=detail['order_id'], columns=detail['dishes_name'], values=detail['counts'], aggfunc=numpy.sum
)
print('以order_id和dishes_name为分组键counts为值得透视表前5行5列为：', detailCross.iloc[:5, :5], sep='\n')

以order_id和dishes_name为分组键counts为值得透视表前5行5列为：
dishes_name   42度海之蓝   北冰洋汽水   38度剑南春   50度古井贡酒  52度泸州老窖 
order_id                                                 
1002             NaN      NaN      NaN      NaN       NaN
1003             NaN      NaN      NaN      NaN       NaN
1004             NaN      NaN      NaN      NaN       NaN
1008             NaN      NaN      NaN      NaN       NaN
1011             1.0      NaN      NaN      NaN       NaN


## 代码4-75　订单详情表单日菜品成交总额与总数透视表

In [9]:
import pandas
import numpy
from sqlalchemy import create_engine
engine = create_engine('mssql+pymssql://sa:123456@localhost:1433/testdb?charset=utf8')
detail = pandas.read_sql_table('meal_order_detail1', con=engine)
detail['place_order_time'] = pandas.to_datetime(detail['place_order_time'])
detail['date'] = [i.date() for i in detail['place_order_time']]
PivotDetail = pandas.pivot_table(
    detail[['date', 'dishes_name', 'counts', 'amounts']], index='date', aggfunc=numpy.sum, margins=True
)
print('订单详情表单日菜品成交总额与总数透视表前5行为：', PivotDetail.head(), sep='\n')

订单详情表单日菜品成交总额与总数透视表前5行为：
            amounts  counts
date                       
2016-08-01   9366.0   233.0
2016-08-02   6125.0   151.0
2016-08-03   6890.0   192.0
2016-08-04   7549.0   169.0
2016-08-05   8671.0   224.0


## 代码4-76　订单详情表单个菜品单日成交总额透视表

In [10]:
CrossDetail = pandas.crosstab(
    index=detail['date'], columns=detail['dishes_name'],
    values=detail['amounts'], aggfunc=numpy.sum, margins=True
)
print('订单详情表单个菜品单日成交总额交叉表后5行5列为：', CrossDetail.iloc[-5:, -5:], sep='\n')

订单详情表单个菜品单日成交总额交叉表后5行5列为：
dishes_name  黄尾袋鼠西拉子红葡萄酒  黄油曲奇饼干  黄花菜炒木耳  黑米恋上葡萄       All
date                                                      
2016-08-07         230.0    32.0   105.0    99.0   31306.0
2016-08-08          46.0     NaN     NaN    33.0    6532.0
2016-08-09         138.0     NaN    35.0    99.0    7155.0
2016-08-10          46.0     NaN    70.0    33.0   10231.0
All                736.0    80.0   525.0   561.0  125992.0
