In [1]:
import pandas as pd

# 透视表操作示例

##### 作用：对 DataFrame 进行透视表操作，本质是进行分组聚合
**pd.pivot_table(df, index, columns, values, aggfunc='mean')**<br/>
或<br/>
**df.pivot_table(index, columns, values, aggfunc='mean')**<br/>
参数：<br/>
* df：DataFrame数据
* index：指定列标签，该列的值作为结果中的行标签
* columns：指定列标签，该列的值作为结果中的列标签
* values：指定列标签，依据index和columns指定的列一起分组，并对values指定的列进行聚合
* aggfunc：聚合方法，默认是mean

## 示例1

**加载优衣库门店销售数据 uniqlo.csv**

In [2]:
uniqlo = pd.read_csv('./data/uniqlo.csv')
uniqlo.head()

Unnamed: 0,store_id,city,channel,gender_group,age_group,wkd_ind,product,customer,revenue,order,quant,unit_cost
0,658,深圳,线下,Female,25-29,Weekday,当季新品,4,796.0,4,4,59
1,146,杭州,线下,Female,25-29,Weekday,运动,1,149.0,1,1,49
2,70,深圳,线下,Male,>=60,Weekday,T恤,2,178.0,2,2,49
3,658,深圳,线下,Female,25-29,Weekday,T恤,1,59.0,1,1,49
4,229,深圳,线下,Male,20-24,Weekend,袜子,2,65.0,2,3,9


In [3]:
uniqlo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22293 entries, 0 to 22292
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   store_id      22293 non-null  int64  
 1   city          22293 non-null  object 
 2   channel       22293 non-null  object 
 3   gender_group  22293 non-null  object 
 4   age_group     22293 non-null  object 
 5   wkd_ind       22293 non-null  object 
 6   product       22293 non-null  object 
 7   customer      22293 non-null  int64  
 8   revenue       22293 non-null  float64
 9   order         22293 non-null  int64  
 10  quant         22293 non-null  int64  
 11  unit_cost     22293 non-null  int64  
dtypes: float64(1), int64(5), object(6)
memory usage: 2.0+ MB


**示例1：统计不同种类产品在不同城市的销量**

In [15]:
# 分组聚合
# 分组聚合操作：统计不同种类产品在不同城市的销量
result = uniqlo.groupby(['product', 'city'])['quant'].sum()
result

product  city
T恤       上海      2118
         北京       800
         南京       568
         广州      1681
         成都      1079
                 ... 
配件       杭州       777
         武汉       755
         深圳      1046
         西安       260
         重庆       339
Name: quant, Length: 90, dtype: int64

In [16]:
# unstack 操作
result = result.unstack()
result

city,上海,北京,南京,广州,成都,杭州,武汉,深圳,西安,重庆
product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
T恤,2118,800,568,1681,1079,3100,2964,3697,1145,1273
当季新品,550,188,266,459,329,840,862,1126,281,437
毛衣,131,37,44,149,92,238,202,248,105,110
牛仔裤,307,46,52,194,178,388,415,477,152,223
短裤,306,87,49,266,179,456,490,608,174,206
袜子,370,123,117,379,240,662,568,735,186,259
裙子,102,27,26,107,77,163,165,201,71,56
运动,171,31,17,174,125,351,282,364,118,161
配件,550,138,126,406,225,777,755,1046,260,339


In [17]:
# 透视表操作
# 透视表操作
uniqlo.pivot_table(index='product', 
                   columns='city', 
                   values='quant', 
                   aggfunc='sum')

city,上海,北京,南京,广州,成都,杭州,武汉,深圳,西安,重庆
product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
T恤,2118,800,568,1681,1079,3100,2964,3697,1145,1273
当季新品,550,188,266,459,329,840,862,1126,281,437
毛衣,131,37,44,149,92,238,202,248,105,110
牛仔裤,307,46,52,194,178,388,415,477,152,223
短裤,306,87,49,266,179,456,490,608,174,206
袜子,370,123,117,379,240,662,568,735,186,259
裙子,102,27,26,107,77,163,165,201,71,56
运动,171,31,17,174,125,351,282,364,118,161
配件,550,138,126,406,225,777,755,1046,260,339


## 示例2

In [6]:
uniqlo

Unnamed: 0,store_id,city,channel,gender_group,age_group,wkd_ind,product,customer,revenue,order,quant,unit_cost
0,658,深圳,线下,Female,25-29,Weekday,当季新品,4,796.0,4,4,59
1,146,杭州,线下,Female,25-29,Weekday,运动,1,149.0,1,1,49
2,70,深圳,线下,Male,>=60,Weekday,T恤,2,178.0,2,2,49
3,658,深圳,线下,Female,25-29,Weekday,T恤,1,59.0,1,1,49
4,229,深圳,线下,Male,20-24,Weekend,袜子,2,65.0,2,3,9
...,...,...,...,...,...,...,...,...,...,...,...,...
22288,146,杭州,线下,Female,30-34,Weekday,短裤,1,80.0,1,2,19
22289,430,成都,线下,Female,25-29,Weekend,T恤,1,79.0,1,1,49
22290,449,武汉,线下,Female,35-39,Weekday,T恤,1,158.0,1,2,49
22291,758,杭州,线下,Female,20-24,Weekday,袜子,1,26.0,1,1,9


**示例2：统计不同城市、不同渠道，不同种类产品的销售额**

In [18]:
# 分组聚合
result = uniqlo.groupby(['city', 'channel', 'product'])['revenue'].sum()
result

city  channel  product
上海    线上       T恤         48637.48
               当季新品       16136.50
               毛衣          5019.00
               牛仔裤         9264.93
               短裤          4453.04
                            ...   
重庆    线下       短裤          6149.23
               袜子          8024.23
               裙子          7897.00
               运动          9284.50
               配件         30133.90
Name: revenue, Length: 135, dtype: float64

In [19]:
result = result.unstack()
result

Unnamed: 0_level_0,product,T恤,当季新品,毛衣,牛仔裤,短裤,袜子,裙子,运动,配件
city,channel,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
上海,线上,48637.48,16136.5,5019.0,9264.93,4453.04,4379.45,5341.0,2264.0,18942.69
上海,线下,126778.56,39138.31,18766.67,22752.47,7206.38,9336.49,9632.93,9119.17,32652.66
北京,线下,70349.96,18854.86,7210.68,4309.0,3241.67,4658.24,3829.55,2214.0,15790.66
南京,线下,47702.91,36697.68,7525.1,5753.63,1836.22,4258.62,4168.0,1406.34,13802.43
广州,线上,93729.71,26574.95,14410.0,9957.9,6433.92,8791.61,5932.0,6438.67,28624.54
广州,线下,47157.01,18994.0,10461.0,6169.0,3481.07,4724.0,8013.56,3929.32,14302.23
成都,线下,89127.68,35935.63,15638.33,14922.04,7103.22,8095.41,10160.33,7678.66,19528.56
杭州,线下,253602.43,99377.3,45169.62,36259.89,17715.38,21559.65,21385.04,23875.08,70574.1
武汉,线上,120358.13,32243.46,22981.63,19537.13,9894.47,10715.32,10086.0,6106.36,49498.23
武汉,线下,132910.0,64175.42,15849.0,27968.16,8709.19,9866.31,13669.26,11760.96,23448.75


In [20]:
# 透视表操作
uniqlo.pivot_table(index=['city','channel'], columns='product', values='revenue', aggfunc='sum')

Unnamed: 0_level_0,product,T恤,当季新品,毛衣,牛仔裤,短裤,袜子,裙子,运动,配件
city,channel,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
上海,线上,48637.48,16136.5,5019.0,9264.93,4453.04,4379.45,5341.0,2264.0,18942.69
上海,线下,126778.56,39138.31,18766.67,22752.47,7206.38,9336.49,9632.93,9119.17,32652.66
北京,线下,70349.96,18854.86,7210.68,4309.0,3241.67,4658.24,3829.55,2214.0,15790.66
南京,线下,47702.91,36697.68,7525.1,5753.63,1836.22,4258.62,4168.0,1406.34,13802.43
广州,线上,93729.71,26574.95,14410.0,9957.9,6433.92,8791.61,5932.0,6438.67,28624.54
广州,线下,47157.01,18994.0,10461.0,6169.0,3481.07,4724.0,8013.56,3929.32,14302.23
成都,线下,89127.68,35935.63,15638.33,14922.04,7103.22,8095.41,10160.33,7678.66,19528.56
杭州,线下,253602.43,99377.3,45169.62,36259.89,17715.38,21559.65,21385.04,23875.08,70574.1
武汉,线上,120358.13,32243.46,22981.63,19537.13,9894.47,10715.32,10086.0,6106.36,49498.23
武汉,线下,132910.0,64175.42,15849.0,27968.16,8709.19,9866.31,13669.26,11760.96,23448.75


## 示例3

In [9]:
uniqlo

Unnamed: 0,store_id,city,channel,gender_group,age_group,wkd_ind,product,customer,revenue,order,quant,unit_cost
0,658,深圳,线下,Female,25-29,Weekday,当季新品,4,796.0,4,4,59
1,146,杭州,线下,Female,25-29,Weekday,运动,1,149.0,1,1,49
2,70,深圳,线下,Male,>=60,Weekday,T恤,2,178.0,2,2,49
3,658,深圳,线下,Female,25-29,Weekday,T恤,1,59.0,1,1,49
4,229,深圳,线下,Male,20-24,Weekend,袜子,2,65.0,2,3,9
...,...,...,...,...,...,...,...,...,...,...,...,...
22288,146,杭州,线下,Female,30-34,Weekday,短裤,1,80.0,1,2,19
22289,430,成都,线下,Female,25-29,Weekend,T恤,1,79.0,1,1,49
22290,449,武汉,线下,Female,35-39,Weekday,T恤,1,158.0,1,2,49
22291,758,杭州,线下,Female,20-24,Weekday,袜子,1,26.0,1,1,9


**示例3：统计不同种类产品的销售额**

In [22]:
# 分组聚合
uniqlo.groupby('product')[['revenue']].sum()

Unnamed: 0_level_0,revenue
product,Unnamed: 1_level_1
T恤,1538744.84
当季新品,590664.88
毛衣,245630.8
牛仔裤,246127.48
短裤,107485.88
袜子,127731.36
裙子,137302.78
运动,118059.68
配件,444685.15


In [24]:
uniqlo.pivot_table(index='product', values='revenue', aggfunc='sum')

Unnamed: 0_level_0,revenue
product,Unnamed: 1_level_1
T恤,1538744.84
当季新品,590664.88
毛衣,245630.8
牛仔裤,246127.48
短裤,107485.88
袜子,127731.36
裙子,137302.78
运动,118059.68
配件,444685.15


In [11]:
# 透视表操作


## 示例4

In [12]:
uniqlo

Unnamed: 0,store_id,city,channel,gender_group,age_group,wkd_ind,product,customer,revenue,order,quant,unit_cost
0,658,深圳,线下,Female,25-29,Weekday,当季新品,4,796.0,4,4,59
1,146,杭州,线下,Female,25-29,Weekday,运动,1,149.0,1,1,49
2,70,深圳,线下,Male,>=60,Weekday,T恤,2,178.0,2,2,49
3,658,深圳,线下,Female,25-29,Weekday,T恤,1,59.0,1,1,49
4,229,深圳,线下,Male,20-24,Weekend,袜子,2,65.0,2,3,9
...,...,...,...,...,...,...,...,...,...,...,...,...
22288,146,杭州,线下,Female,30-34,Weekday,短裤,1,80.0,1,2,19
22289,430,成都,线下,Female,25-29,Weekend,T恤,1,79.0,1,1,49
22290,449,武汉,线下,Female,35-39,Weekday,T恤,1,158.0,1,2,49
22291,758,杭州,线下,Female,20-24,Weekday,袜子,1,26.0,1,1,9


**示例4：统计不同城市销售的产品种类数和产品的总销售额**

In [25]:
# 分组聚合
# 分组聚合操作
uniqlo.groupby('city').agg({'product': 'nunique', 'revenue': 'sum'})

Unnamed: 0_level_0,product,revenue
city,Unnamed: 1_level_1,Unnamed: 2_level_1
上海,9,389821.73
北京,9,130458.62
南京,9,123150.93
广州,9,318124.49
成都,9,208189.86
杭州,9,589518.49
武汉,9,589777.78
深圳,9,733123.68
西安,9,210774.62
重庆,9,263492.65


In [26]:
# 透视表操作
uniqlo.pivot_table(index='city', values=['product', 'revenue'], 
                   aggfunc={'product': 'nunique', 'revenue': 'sum'})

Unnamed: 0_level_0,product,revenue
city,Unnamed: 1_level_1,Unnamed: 2_level_1
上海,9,389821.73
北京,9,130458.62
南京,9,123150.93
广州,9,318124.49
成都,9,208189.86
杭州,9,589518.49
武汉,9,589777.78
深圳,9,733123.68
西安,9,210774.62
重庆,9,263492.65
