# 如何在大數據中找出「關鍵少數」，贏得最大獲利？
## 作者：葉庭妤(臺灣行銷研究特邀作者)、鍾皓軒(臺灣行銷研究創辦人)

In [1]:
# 引入套件
import plotly.tools as tls
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.express as px
import pandas as pd


# 讀取資料

資料可以在此[取得](https://drive.google.com/file/d/1QVHV7IDTFdpeQO0jY41wmxhgLizRzhoJ/view?usp=sharing)，並下載下來後，將資料與本
ipynb檔案放於同一個工作目錄中，再執行下方程式即可。

In [2]:
all_data = pd.read_csv('new_salesdata.csv',encoding= 'utf-8-sig')
all_data = all_data.drop(['Unnamed: 0'],axis =1)
all_data = all_data.drop(['Unnamed: 0.1'],axis =1)

In [3]:
# 找出總共有幾個不同系列
print('總共有',len(all_data['系列'].unique()),'種不同系列')

# 找出總共有幾個不同產品
print('總共有',len(all_data['產品'].unique()),'種不同產品')

總共有 1051 種不同系列
總共有 8076 種不同產品


## 資料簡介
1. 顏色：產品顏色，若產品非服飾類則無顏色
2. 單價：產品售出單價
3. 成本：產品成本
4. 系列：產品所屬系列
5. 產品：產品所屬系列下的產品類別，例如：產品4–1指的是系列4產品中的第一個產品
6. 訂單時間：客人購買下訂單的時間
7. 會員：會員代號流水號，相同顧客會有相同的流水號
8. 性別：FEMALE為女，MALE為男
9. 年紀：會員年紀
10. 廣告代號all：顧客如何知道此產品管道
11. 尺寸：產品尺寸，若產品非服飾類則無顏色

In [4]:
# 查看資料型態
all_data.head(10)

Unnamed: 0,顏色,單價,成本,系列,產品,訂單時間,會員,性別,年紀,廣告代號all,尺寸
0,,643.195,394.8,系列4,產品4-1,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無
1,,391.51,225.365,系列4,產品4-2,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無
2,watermelonred,713.93,416.185,系列4,產品4-3,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,S
3,,557.655,337.225,系列4,產品4-4,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無
4,white,628.39,366.835,系列4,產品4-3,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,S
5,navyblue,713.93,416.185,系列4,產品4-3,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,S
6,,1062.67,674.45,系列4,產品4-5,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無
7,gray,1044.575,806.05,系列4,產品4-6,2016-01-01T05:53:09,L_CBY_03808,FEMALE,32.0,廣告_自然流量,M
8,black,713.93,416.185,系列4,產品4-3,2016-01-01T05:53:09,L_CBY_03808,FEMALE,32.0,廣告_自然流量,M
9,gray,1044.575,806.05,系列4,產品4-6,2016-01-01T05:53:09,L_CBY_03808,FEMALE,32.0,廣告_自然流量,M


# 計算系列利潤

In [5]:
# 計算利潤，利潤 = 單價 - 成本
all_data['利潤'] = all_data['單價'] - all_data['成本']
all_data

Unnamed: 0,顏色,單價,成本,系列,產品,訂單時間,會員,性別,年紀,廣告代號all,尺寸,利潤
0,,643.195,394.800,系列4,產品4-1,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無,248.395
1,,391.510,225.365,系列4,產品4-2,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無,166.145
2,watermelonred,713.930,416.185,系列4,產品4-3,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,S,297.745
3,,557.655,337.225,系列4,產品4-4,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無,220.430
4,white,628.390,366.835,系列4,產品4-3,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,S,261.555
...,...,...,...,...,...,...,...,...,...,...,...,...
353219,,575.750,403.025,系列207,產品207-35,2019-11-12T14:44:14,B_GSJ_25617,FEMALE,18.0,廣告_自然流量,無,172.725
353220,,575.750,403.025,系列207,產品207-35,2019-11-13T09:37:06,B_GSJ_16318,FEMALE,24.0,廣告_自然流量,無,172.725
353221,,575.750,403.025,系列207,產品207-35,2019-11-14T18:34:45,B_GSJ_77300,,28.0,廣告_KBDG_MK,無,172.725
353222,,575.750,403.025,系列207,產品207-35,2019-11-14T18:34:45,B_GSJ_77300,,28.0,廣告_KBDG_MK,無,172.725


In [6]:
# 將「各系列」的「利潤」分別總和
profit_data = all_data.groupby('系列', as_index = False)['利潤'].sum()
# 計算利潤佔比
profit_data['利潤佔比'] = profit_data['利潤'] / profit_data['利潤'].sum()
# 利潤佔比依大小排序
profit_data = profit_data.sort_values('利潤佔比', ascending = False)
# 取出前10名
top10_profit = profit_data.iloc[0:10]
top10_profit

Unnamed: 0,系列,利潤,利潤佔比
0,系列1,14193350.0,0.088604
163,系列2,12641900.0,0.078919
274,系列3,8772754.0,0.054765
607,系列6,5128763.0,0.032017
718,系列7,4195607.0,0.026192
385,系列4,4179078.0,0.026088
496,系列5,4087930.0,0.025519
64,系列11,4069565.0,0.025405
940,系列9,3867114.0,0.024141
1,系列10,3256977.0,0.020332


In [7]:
# 製作圖片
top30 = profit_data.iloc[:30]
profit_fig = px.bar(data_frame = top30,
             x = '系列', 
             y = '利潤',
             title='2019年各系列利潤排序長條圖', 
             color = '利潤', 
             )
plot(profit_fig, filename='2019年各系列利潤排序長條圖.html')
# 查看圖片
profit_fig.show()

# 如何找出貢獻總利潤80%的產品清單？

In [8]:
# 計算累積利潤佔比
profit_data['累積利潤佔比'] = profit_data['利潤佔比'].cumsum()
profit_data

Unnamed: 0,系列,利潤,利潤佔比,累積利潤佔比
0,系列1,1.419335e+07,8.860377e-02,0.088604
163,系列2,1.264190e+07,7.891864e-02,0.167522
274,系列3,8.772754e+06,5.476500e-02,0.222287
607,系列6,5.128763e+06,3.201694e-02,0.254304
718,系列7,4.195607e+06,2.619160e-02,0.280496
...,...,...,...,...
622,系列612,1.348900e+02,8.420675e-07,0.999998
43,系列1037,1.118600e+02,6.982999e-07,0.999999
1006,系列959,9.376500e+01,5.853396e-07,1.000000
7,系列1004,5.922000e+01,3.696882e-07,1.000000


In [9]:
profit_data_cumsum80 = profit_data[profit_data['累積利潤佔比']<= 0.8]


In [10]:
# 找出創造「前百分之八十利潤」的系列佔總系列的比例，取到小數點下兩位
round(len(profit_data_cumsum80) / len(profit_data),2)

0.06

# 八二法則只能用在「系列」產品上嗎？

In [11]:
# 查看產品的累積利潤佔比
SKU_data = all_data.groupby('產品', as_index = False)['利潤'].sum()
SKU_data['利潤佔比'] = SKU_data['利潤']/SKU_data['利潤'].sum()
SKU_data = SKU_data.sort_values('利潤佔比',ascending= False)
SKU_data['累積利潤佔比'] = SKU_data['利潤佔比'].cumsum()
SKU_data_80 = SKU_data[SKU_data['累積利潤佔比']<=0.8]
SKU_data_80

Unnamed: 0,產品,利潤,利潤佔比,累積利潤佔比
3,產品1-12,4085308.150,0.025503,0.025503
6290,產品6-12,3260097.190,0.020352,0.045855
16,產品1-3,2099893.495,0.013109,0.058963
7741,產品9-12,2008563.095,0.012539,0.071502
2007,產品2-12,1940270.920,0.012112,0.083615
...,...,...,...,...
2511,產品23-37,43487.220,0.000271,0.798699
3400,產品29-16,43367.135,0.000271,0.798970
605,產品123-3,43332.590,0.000271,0.799240
6061,產品56-18,43135.190,0.000269,0.799509


In [12]:
# 利潤前10名產品
SKU_data_10 = SKU_data[:10]
SKU_data_10

Unnamed: 0,產品,利潤,利潤佔比,累積利潤佔比
3,產品1-12,4085308.15,0.025503,0.025503
6290,產品6-12,3260097.19,0.020352,0.045855
16,產品1-3,2099893.495,0.013109,0.058963
7741,產品9-12,2008563.095,0.012539,0.071502
2007,產品2-12,1940270.92,0.012112,0.083615
2017,產品2-3,1630953.345,0.010181,0.093796
2023,產品2-35,1618874.11,0.010106,0.103902
515,產品12-12,1525623.995,0.009524,0.113426
2016,產品2-29,1499874.81,0.009363,0.122789
7345,產品8-35,1484087.745,0.009265,0.132054


In [13]:
SKU_data_30 = SKU_data.iloc[:30]
# 各產品利潤製圖
SKU_fig = px.bar(data_frame = SKU_data_30,
             x = '產品', 
             y = '利潤',
             title='2019年各產品利潤排序長條圖' ,
             color = '利潤佔比')
plot(SKU_fig, filename='2019年各產品利潤排序長條圖.html')
SKU_fig

In [14]:
# 找出創造「前百分之八十利潤」的產品佔總系列的比例，取到小數點下兩位
round(len(SKU_data_80)/len(SKU_data),2)

0.07

# 管理意涵
## 從這次的資料集，我們可以知道：
## 1. 當我們遇到數以萬計筆資料集的時候，我們可以利用八二法則抓出重要的系列！
## 2. 然而，在每一個產業別不一定是八二法則，像是此資料集就不是20％產品貢獻80％的利潤，但我們仍然可以用此來判斷哪些產品貢獻了80%的利潤！
## 3. 同樣八二法則當然也可以應用在產品、顏色、尺寸等SKU上！