# `盒鬚圖 - 哪個廣告效果好？ 電商產業廣告效果分析實戰案例`
### 作者：徐子皓
***

## 引入套件包與原始資料

In [1]:
# 引入套件包
import pandas as pd
import os
from collections import Counter
import heapq
import plotly.offline as py
import plotly.graph_objects as go
# 引入電商資料
data = pd.read_csv("電商交易資料.csv", encoding='utf-8-sig')
data.head()

Unnamed: 0,顏色,單價,成本,系列,產品,訂單時間,會員,性別,年紀,廣告代號all,尺寸
0,,643.195,394.8,系列4,產品4-1,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無
1,,391.51,225.365,系列4,產品4-2,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無
2,watermelonred,713.93,416.185,系列4,產品4-3,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,S
3,,557.655,337.225,系列4,產品4-4,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無
4,white,628.39,366.835,系列4,產品4-3,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,S


## 資料處理

### 篩選資料欄位、挑選產品系列

In [2]:
data = data[['單價', '成本', '系列', '訂單時間', '廣告代號all']]
data = data[data['系列'] == '系列4']
data.head()

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all
0,643.195,394.8,系列4,2016-01-01T03:19:35,廣告_YND_pid
1,391.51,225.365,系列4,2016-01-01T03:19:35,廣告_YND_pid
2,713.93,416.185,系列4,2016-01-01T03:19:35,廣告_YND_pid
3,557.655,337.225,系列4,2016-01-01T03:19:35,廣告_YND_pid
4,628.39,366.835,系列4,2016-01-01T03:19:35,廣告_YND_pid


### 以「_」區分廣告代號

In [3]:
ad_split = data['廣告代號all'].str.split('_', expand=True)
ad_split

Unnamed: 0,0,1,2
0,廣告,YND,pid
1,廣告,YND,pid
2,廣告,YND,pid
3,廣告,YND,pid
4,廣告,YND,pid
...,...,...,...
11054,廣告,KBDG,MP
11055,廣告,KBDG,MP
11056,廣告,KBDG,MP
11057,廣告,KBDG,MP


### 挑選出廣告系列名稱

In [4]:
ad_series = data['廣告代號all'].str.split('_', expand=True)[1].tolist()
print(ad_series[:10])

['YND', 'YND', 'YND', 'YND', 'YND', 'YND', 'YND', '自然流量', '自然流量', '自然流量']


In [5]:
data['廣告代號'] = ad_series
data.head()

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all,廣告代號
0,643.195,394.8,系列4,2016-01-01T03:19:35,廣告_YND_pid,YND
1,391.51,225.365,系列4,2016-01-01T03:19:35,廣告_YND_pid,YND
2,713.93,416.185,系列4,2016-01-01T03:19:35,廣告_YND_pid,YND
3,557.655,337.225,系列4,2016-01-01T03:19:35,廣告_YND_pid,YND
4,628.39,366.835,系列4,2016-01-01T03:19:35,廣告_YND_pid,YND


## 挑選主力廣告

### 廣告總類數量

In [6]:
data['廣告代號'].nunique()

36

### 計算每個廣告的出現頻率

In [7]:
count_list = Counter(data['廣告代號'])
count_list

Counter({'YND': 52,
         '自然流量': 3780,
         'ikip': 652,
         'B2K': 452,
         'critei': 1285,
         'qdwit': 896,
         'qpw': 5,
         'KDPOD': 937,
         'B2KPOD': 23,
         'KDMK': 320,
         'cridgewell': 6,
         'pic': 5,
         'B2KMK': 10,
         'KDP': 1793,
         'GINEP': 154,
         'edmP': 128,
         'edmM': 4,
         'B2KP': 57,
         'KDPM': 140,
         'KDM': 2,
         'GINEM': 13,
         'B2KM': 1,
         'edmD': 1,
         'edm': 2,
         'KDMP': 29,
         'edmMK': 20,
         'B2KMP': 1,
         'edmMP': 26,
         'GINEMK': 24,
         'GINEMP': 22,
         'B2KDG': 21,
         'KBP': 31,
         'KBMP': 1,
         'IGP': 20,
         'KBDG': 145,
         'IGDG': 1})

### 將廣告出現頻率製作成資料集

In [8]:
count_frame = pd.DataFrame.from_dict(count_list,orient='index').reset_index() # 轉換為DataFrame形式
count_frame.columns = ['廣告名稱', '樣本數'] # 重新命名欄位名稱
count_frame.head()

Unnamed: 0,廣告名稱,樣本數
0,YND,52
1,自然流量,3780
2,ikip,652
3,B2K,452
4,critei,1285


### 將非「自然流量」的廣告根據樣本數排序

In [9]:
count_frame = count_frame[count_frame['廣告名稱']!='自然流量']
count_frame = count_frame.sort_values(by='樣本數', ascending=False)
count_frame.head()

Unnamed: 0,廣告名稱,樣本數
13,KDP,1793
4,critei,1285
7,KDPOD,937
5,qdwit,896
2,ikip,652


### 挑選前2名廣告

In [10]:
chosen_ad = count_frame['廣告名稱'].tolist()[:2]
chosen_ad

['KDP', 'critei']

### 篩選出受主力廣告推播的交易資料

In [11]:
# 判斷是否符合條件
data['廣告代號'].isin(chosen_ad)

0        False
1        False
2        False
3        False
4        False
         ...  
11054    False
11055    False
11056    False
11057    False
11058    False
Name: 廣告代號, Length: 11059, dtype: bool

In [12]:
# 篩選
data = data[data['廣告代號'].isin(chosen_ad)]
data

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all,廣告代號
32,713.930,416.185,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei
33,628.390,366.835,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei
34,713.930,416.185,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei
48,692.545,544.495,系列4,2016-01-02T04:52:47,廣告_critei_critei,critei
53,924.490,697.480,系列4,2016-01-02T08:05:24,廣告_critei_critei,critei
...,...,...,...,...,...,...
10475,985.355,450.730,系列4,2019-04-02T04:58:26,廣告_KDP_D,KDP
10527,970.550,679.385,系列4,2019-05-14T11:51:03,廣告_KDP_D,KDP
10749,1061.025,383.285,系列4,2019-07-25T06:03:50,廣告_KDP_D,KDP
10750,883.365,383.285,系列4,2019-07-25T06:03:50,廣告_KDP_D,KDP


## 計算不同廣告每期帶來的淨利

### 新增「月份」資料

In [13]:
data['月份'] = pd.DatetimeIndex(data['訂單時間']).month
data.head()

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all,廣告代號,月份
32,713.93,416.185,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei,1
33,628.39,366.835,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei,1
34,713.93,416.185,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei,1
48,692.545,544.495,系列4,2016-01-02T04:52:47,廣告_critei_critei,critei,1
53,924.49,697.48,系列4,2016-01-02T08:05:24,廣告_critei_critei,critei,1


### 新增「年月」資料

In [14]:
data['年月'] = pd.to_datetime(data['訂單時間']).dt.to_period('M')
data.head()

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all,廣告代號,月份,年月
32,713.93,416.185,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei,1,2016-01
33,628.39,366.835,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei,1,2016-01
34,713.93,416.185,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei,1,2016-01
48,692.545,544.495,系列4,2016-01-02T04:52:47,廣告_critei_critei,critei,1,2016-01
53,924.49,697.48,系列4,2016-01-02T08:05:24,廣告_critei_critei,critei,1,2016-01


### 新增「淨利」資料

In [15]:
data['淨利'] = data['單價'] - data['成本']
data.head()

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all,廣告代號,月份,年月,淨利
32,713.93,416.185,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei,1,2016-01,297.745
33,628.39,366.835,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei,1,2016-01,261.555
34,713.93,416.185,系列4,2016-01-01T15:10:43,廣告_critei_critei,critei,1,2016-01,297.745
48,692.545,544.495,系列4,2016-01-02T04:52:47,廣告_critei_critei,critei,1,2016-01,148.05
53,924.49,697.48,系列4,2016-01-02T08:05:24,廣告_critei_critei,critei,1,2016-01,227.01


### 欄位篩選

In [16]:
data = data[['廣告代號', '月份', '年月', '淨利']]
data.head()

Unnamed: 0,廣告代號,月份,年月,淨利
32,critei,1,2016-01,297.745
33,critei,1,2016-01,261.555
34,critei,1,2016-01,297.745
48,critei,1,2016-01,148.05
53,critei,1,2016-01,227.01


### 根據「年月」欄位，進行資料群組

In [17]:
data = data.groupby(['廣告代號', '月份', '年月'])['淨利'].sum()
data = data.to_frame().reset_index()
data.head()

Unnamed: 0,廣告代號,月份,年月,淨利
0,KDP,1,2018-01,46314.975
1,KDP,1,2019-01,1118.6
2,KDP,2,2017-02,12105.555
3,KDP,2,2018-02,2827.755
4,KDP,2,2019-02,858.69


### 刪除「年月」欄位，新增資料計數用欄位「count」

In [18]:
data = data.drop(['年月'], axis=1)
data['count'] = 1
data.head()

Unnamed: 0,廣告代號,月份,淨利,count
0,KDP,1,46314.975,1
1,KDP,1,1118.6,1
2,KDP,2,12105.555,1
3,KDP,2,2827.755,1
4,KDP,2,858.69,1


## 繪圖

### 盒狀圖繪製

In [19]:
fig = go.Figure() # 設定空白畫布
colors_box = ['#3366CC', '#DC3912'] # 設定盒子顏色
color_num = 0
# 繪製盒鬚圖
for i in data['廣告代號'].unique():
    fig.add_trace(go.Box(
    y = data[data['廣告代號'] == i]['淨利'],
    x = data[data['廣告代號'] == i]['月份'],
    name = i,
    marker_color= colors_box[color_num]))
    color_num = color_num + 1

### 平均值折線圖繪製

In [20]:
adlist = list(data['廣告代號'].unique()) # 廣告名稱清單
color_line = ["royalblue" , "firebrick"] # 設定線條顏色
colour = 0 # 取用的顏色順序
# 畫平均線
for i in adlist:
    ad = i # 選用廣告
    meanlist = [] # 平均
    axislist = [] # 月分
    
    for ii in range(1,13):
        axislist.append(ii)
        total = data[data['廣告代號'] == ad][data['月份'] == ii]['淨利'].sum() # 總利潤
        counts = data[data['廣告代號'] == ad][data['月份'] == ii]['count'].sum() # 資料筆數
        aver = total/counts # 平均利潤 = 總利潤/資料筆數
        meanlist.append(aver)
        # 如果1~12月都已經計算完畢，就繪圖
        if ii == 12:   
            # 繪圖
            fig.add_trace(go.Scatter(
            x= axislist,
            y= meanlist,
            mode="lines+markers",
            textfont=dict(
            family="sans serif",
            size=16,
            color="royalblue"),    
            line=dict(color=color_line[colour], width=2),
            ))
            # 即將更換廣告，挑選另一種顏色
            colour = colour + 1 


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will 

### 佈景主題設定

In [21]:
# 設定佈景主題(字體、大小、背景等)
fig.update_layout(
    title={
        'text': "<b>BoxPlot－廣告效益分析</b>",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',},
    yaxis_title='Profit',
    xaxis={
        'title': 'Month',
        'tickmode': 'linear'
        },
    width=1800,
    height=960,
    boxmode='group',
    font=dict(
        family="Courier New, monospace",
        size=20,
        color="lightslategrey"
    )
    )
fig.show()

### 將繪圖成果另存新檔

In [22]:
# 另存html檔
py.plot(fig, filename='CH3-8產出成果_廣告效益分析', auto_open=True)
    
# 另存圖檔
fig.write_image("CH3-8產出成果_廣告效益分析.png")


Your filename `CH3-8產出成果_廣告效益分析` didn't end with .html. Adding .html to the end of your file.

