# 盒鬚圖 - 哪個廣告效果好？ 電商產業廣告效果分析實戰案例

#### 作者：徐子皓(臺灣行銷研究特邀作者)
#### 完整文章介紹鏈接：https://medium.com/p/f1fd6e0006e4/

## 一、基本資料引入

### 1. 引入套件包、原始資料

In [1]:
import pandas as pd
import os
from collections import Counter
import heapq
import plotly.offline as py
import plotly.graph_objects as go

In [2]:
data = pd.read_csv("sales_data.csv")
data.head(6)

Unnamed: 0.1,Unnamed: 0,顏色,單價,成本,系列,產品,訂單時間,會員,性別,年紀,廣告代號all,尺寸
0,0,,391,240,系列4,產品4-1,2016-01-01T03:19:35,10052013,FEMALE,32.0,廣告_YND_pid,無
1,1,,238,137,系列4,產品4-2,2016-01-01T03:19:35,10052013,FEMALE,32.0,廣告_YND_pid,無
2,2,watermelonred,434,253,系列4,產品4-3,2016-01-01T03:19:35,10052013,FEMALE,32.0,廣告_YND_pid,S
3,3,,339,205,系列4,產品4-4,2016-01-01T03:19:35,10052013,FEMALE,32.0,廣告_YND_pid,無
4,4,white,382,223,系列4,產品4-3,2016-01-01T03:19:35,10052013,FEMALE,32.0,廣告_YND_pid,S
5,5,navyblue,434,253,系列4,產品4-3,2016-01-01T03:19:35,10052013,FEMALE,32.0,廣告_YND_pid,S


### 2. 移除多餘欄位

In [3]:
data = data.drop(columns = list(data.filter(regex='Unnamed: 0|顏色|會員|產品|性別|年紀|尺寸')))
data.head(6)

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all
0,391,240,系列4,2016-01-01T03:19:35,廣告_YND_pid
1,238,137,系列4,2016-01-01T03:19:35,廣告_YND_pid
2,434,253,系列4,2016-01-01T03:19:35,廣告_YND_pid
3,339,205,系列4,2016-01-01T03:19:35,廣告_YND_pid
4,382,223,系列4,2016-01-01T03:19:35,廣告_YND_pid
5,434,253,系列4,2016-01-01T03:19:35,廣告_YND_pid


## 二、基礎資料處理

### 1. 只留下「系列4」的資料

In [4]:
data = data[ data ['系列'] == '系列4']
data.head(6)

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all
0,391,240,系列4,2016-01-01T03:19:35,廣告_YND_pid
1,238,137,系列4,2016-01-01T03:19:35,廣告_YND_pid
2,434,253,系列4,2016-01-01T03:19:35,廣告_YND_pid
3,339,205,系列4,2016-01-01T03:19:35,廣告_YND_pid
4,382,223,系列4,2016-01-01T03:19:35,廣告_YND_pid
5,434,253,系列4,2016-01-01T03:19:35,廣告_YND_pid


### 2. 重新整理廣告代號格式

In [5]:
for i in range(0,len(data['廣告代號all'])):
    a = data['廣告代號all'][i].split('_')[1]
    data['廣告代號all'][i] = a

In [6]:
data.head(6)

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all
0,391,240,系列4,2016-01-01T03:19:35,YND
1,238,137,系列4,2016-01-01T03:19:35,YND
2,434,253,系列4,2016-01-01T03:19:35,YND
3,339,205,系列4,2016-01-01T03:19:35,YND
4,382,223,系列4,2016-01-01T03:19:35,YND
5,434,253,系列4,2016-01-01T03:19:35,YND


## 三、進階資料處理－挑選主力廣告

### 1. 抓出廣告數量

In [7]:
len(data['廣告代號all'].unique())

36

### 2. 廣告使用頻率表

In [8]:
count_list = Counter(data['廣告代號all'])
count_list

Counter({'YND': 52,
         '自然流量': 3780,
         'ikip': 652,
         'B2K': 452,
         'critei': 1285,
         'qdwit': 896,
         'qpw': 5,
         'KDPOD': 937,
         'B2KPOD': 23,
         'KDMK': 320,
         'cridgewell': 6,
         'pic': 5,
         'B2KMK': 10,
         'KDP': 1793,
         'GINEP': 154,
         'edmP': 128,
         'edmM': 4,
         'B2KP': 57,
         'KDPM': 140,
         'KDM': 2,
         'GINEM': 13,
         'B2KM': 1,
         'edmD': 1,
         'edm': 2,
         'KDMP': 29,
         'edmMK': 20,
         'B2KMP': 1,
         'edmMP': 26,
         'GINEMK': 24,
         'GINEMP': 22,
         'B2KDG': 21,
         'KBP': 31,
         'KBMP': 1,
         'IGP': 20,
         'KBDG': 145,
         'IGDG': 1})

### 3. 將「count_list」轉換為DataFrame格式

In [9]:
count_list = pd.DataFrame.from_dict(count_list,orient='index').reset_index() # 轉換為DataFrame形式
count_list = count_list.rename(columns = {'index' : '廣告名稱', 0: '樣本數'}) # 重新命名欄位名稱
count_list

Unnamed: 0,廣告名稱,樣本數
0,YND,52
1,自然流量,3780
2,ikip,652
3,B2K,452
4,critei,1285
5,qdwit,896
6,qpw,5
7,KDPOD,937
8,B2KPOD,23
9,KDMK,320


### 4. 取出前三大廣告頻率

In [10]:
heapq.nlargest(3,list(count_list['樣本數']))

[3780, 1793, 1285]

### 5. 抓出第三名的廣告使用頻率

In [11]:
dead_number = heapq.nlargest(4,list(count_list['樣本數']))[2]
dead_number

1285

### 6. 製作出非熱門廣告清單

In [12]:
dead_list = count_list[count_list['樣本數'] < dead_number]
dead_list

Unnamed: 0,廣告名稱,樣本數
0,YND,52
2,ikip,652
3,B2K,452
5,qdwit,896
6,qpw,5
7,KDPOD,937
8,B2KPOD,23
9,KDMK,320
10,cridgewell,6
11,pic,5


### 7. 將非熱門廣告從資料集中排除

In [13]:
for i in dead_list['廣告名稱']:
    data= data[~data['廣告代號all'].isin([i])]

In [14]:
data.head(6)

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all
7,635,490,系列4,2016-01-01T05:53:09,自然流量
8,434,253,系列4,2016-01-01T05:53:09,自然流量
9,635,490,系列4,2016-01-01T05:53:09,自然流量
10,562,424,系列4,2016-01-01T05:53:09,自然流量
11,276,194,系列4,2016-01-01T05:53:09,自然流量
12,635,490,系列4,2016-01-01T07:21:44,自然流量


### 8. 將自然流量的交易資料刪除

In [15]:
data = data[~data['廣告代號all'].isin(['自然流量'])]
data.head(6)

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all
32,434,253,系列4,2016-01-01T15:10:43,critei
33,382,223,系列4,2016-01-01T15:10:43,critei
34,434,253,系列4,2016-01-01T15:10:43,critei
48,421,331,系列4,2016-01-02T04:52:47,critei
53,562,424,系列4,2016-01-02T08:05:24,critei
54,615,471,系列4,2016-01-02T08:05:24,critei


## 四、進階資料處理－不同廣告每期帶來的淨利

### 1. 新增「月份」欄位

In [16]:
data['月份'] = data['訂單時間'].str.split('-', expand = True)[1].astype(int)
data.head(6)

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all,月份
32,434,253,系列4,2016-01-01T15:10:43,critei,1
33,382,223,系列4,2016-01-01T15:10:43,critei,1
34,434,253,系列4,2016-01-01T15:10:43,critei,1
48,421,331,系列4,2016-01-02T04:52:47,critei,1
53,562,424,系列4,2016-01-02T08:05:24,critei,1
54,615,471,系列4,2016-01-02T08:05:24,critei,1


### 2. 新增「年月」欄位

In [17]:
data['年月'] = (data['訂單時間'].str.split('-', expand = True)[0] + data['訂單時間'].str.split('-', expand = True)[1]).astype(int)
data.head(6)

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all,月份,年月
32,434,253,系列4,2016-01-01T15:10:43,critei,1,201601
33,382,223,系列4,2016-01-01T15:10:43,critei,1,201601
34,434,253,系列4,2016-01-01T15:10:43,critei,1,201601
48,421,331,系列4,2016-01-02T04:52:47,critei,1,201601
53,562,424,系列4,2016-01-02T08:05:24,critei,1,201601
54,615,471,系列4,2016-01-02T08:05:24,critei,1,201601


### 3. 新增「淨利」欄位

In [18]:
data['淨利'] = data['單價'] - data['成本']
data.head(6)

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all,月份,年月,淨利
32,434,253,系列4,2016-01-01T15:10:43,critei,1,201601,181
33,382,223,系列4,2016-01-01T15:10:43,critei,1,201601,159
34,434,253,系列4,2016-01-01T15:10:43,critei,1,201601,181
48,421,331,系列4,2016-01-02T04:52:47,critei,1,201601,90
53,562,424,系列4,2016-01-02T08:05:24,critei,1,201601,138
54,615,471,系列4,2016-01-02T08:05:24,critei,1,201601,144


### 4. 根據「廣告代號all」、「月份」及「年月」欄位交叉組合出不同的淨利總和

In [19]:
data = data.groupby(['廣告代號all', '月份', '年月'])['淨利'].sum()
data

廣告代號all  月份  年月    
KDP      1   201801    28155
             201901      680
         2   201702     7359
             201802     1719
             201902      522
                       ...  
critei   11  201711     1410
             201811     1323
         12  201612     7427
             201712     1787
             201812     1372
Name: 淨利, Length: 68, dtype: int64

### 5. 將「data」重新組合為DataFrame形式

In [20]:
data = data.to_frame().reset_index()
data.head(6)

Unnamed: 0,廣告代號all,月份,年月,淨利
0,KDP,1,201801,28155
1,KDP,1,201901,680
2,KDP,2,201702,7359
3,KDP,2,201802,1719
4,KDP,2,201902,522
5,KDP,3,201703,40474


### 6. 移除多餘欄位，並新增「count」欄位

In [21]:
data = data.drop(['年月'], axis=1)
data['count'] = 1
data.head(6)

Unnamed: 0,廣告代號all,月份,淨利,count
0,KDP,1,28155,1
1,KDP,1,680,1
2,KDP,2,7359,1
3,KDP,2,1719,1
4,KDP,2,522,1
5,KDP,3,40474,1


## 五、繪圖

### 1. 繪製盒鬚圖

In [22]:
fig = go.Figure() # 設定空白畫布
colors_box = ['#3366CC', '#DC3912'] # 設定盒子顏色
# 繪製盒鬚圖
for i in range(0, len(data['廣告代號all'].unique())):
    fig.add_trace(go.Box(
    y = data[data['廣告代號all'] ==data['廣告代號all'].unique()[i]]['淨利'],
    x = data[data['廣告代號all'] ==data['廣告代號all'].unique()[i]]['月份'],
    name = str(data[data['廣告代號all'] ==data['廣告代號all'].unique()[i]]['廣告代號all'][0:1].values[0]),
    marker_color= colors_box[i]
    ))


### 2. 繪製平均折線圖

In [23]:
adlist = list(data['廣告代號all'].unique()) # 廣告名稱清單
color_line = ["royalblue" , "firebrick"] # 設定線條顏色
# 畫平均線
for i in range(0,len(adlist)):
    ad = adlist[i]
    colour = color_line[i]
    meanlist = [] # 平均
    axislist = [] # 月分
    
    for ii in range(1,13):
        axislist.append(ii)
        total = data[data['廣告代號all'] == ad][data['月份'] == ii]['淨利'].sum()
        counts = data[data['廣告代號all'] == ad][data['月份'] == ii]['count'].sum()
        aver = total/counts
        meanlist.append(aver)
        
        if ii == 12:   
            fig.add_trace(go.Scatter(
            x= axislist,
            y= meanlist,
            mode="lines+markers",
            textfont=dict(
            family="sans serif",
            size=16,
            color="royalblue"),    
            line=dict(color=colour, width=2),
            ))


### 3. 設定佈景主題

In [24]:
# 設定佈景主題(字體、大小、背景等)
fig.update_layout(
    title={
        'text': "<b>BoxPlot－系列四 廣告效益分析</b>",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',},
    yaxis_title='Profit',
    xaxis={
        'title': 'Month',
        'tickmode': 'linear'
        },
    width=1800,
    height=960,
    boxmode='group',
    font=dict(
        family="Courier New, monospace",
        size=20,
        color="lightslategrey"
    )
    )


### 4. 將產出另存

In [None]:
# 另存html檔
py.plot(fig, filename='BoxPlot－系列四 廣告效益分析', auto_open=True)
    
# 另存圖檔
fig.write_image("BoxPlot－系列四 廣告效益分析.png")