# `折線圖 - 行銷活動成長分析`
### 作者：徐子皓、陳俊凱
***

## 引入套件包

In [1]:
import pandas as pd 
import numpy as np
import plotly.graph_objects as go
import plotly.offline as py
import plotly.io as pio

## 讀取資料

In [2]:
sales_data = pd.read_csv('電商交易資料.csv', encoding='utf-8-sig')
sales_data.head()

Unnamed: 0,顏色,單價,成本,系列,產品,訂單時間,會員,性別,年紀,廣告代號all,尺寸,訂單編號
0,,643.195,394.8,系列4,產品4-1,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無,201714
1,,391.51,225.365,系列4,產品4-2,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無,201714
2,watermelonred,713.93,416.185,系列4,產品4-3,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,S,201714
3,,557.655,337.225,系列4,產品4-4,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,無,201714
4,white,628.39,366.835,系列4,產品4-3,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,廣告_YND_pid,S,201714


### 只保留廣告代號的系列，去掉細項

In [3]:
sales_data['廣告代號all'] = sales_data['廣告代號all'].str.split('_').str.get(1)
sales_data.head()

Unnamed: 0,顏色,單價,成本,系列,產品,訂單時間,會員,性別,年紀,廣告代號all,尺寸,訂單編號
0,,643.195,394.8,系列4,產品4-1,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,YND,無,201714
1,,391.51,225.365,系列4,產品4-2,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,YND,無,201714
2,watermelonred,713.93,416.185,系列4,產品4-3,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,YND,S,201714
3,,557.655,337.225,系列4,產品4-4,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,YND,無,201714
4,white,628.39,366.835,系列4,產品4-3,2016-01-01T03:19:35,B_GSJ_06674,FEMALE,32.0,YND,S,201714


## 資料處理

### 只保留所需欄位

In [4]:
sales_data = sales_data[['單價','成本','系列','訂單時間','廣告代號all']]
sales_data.head()

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all
0,643.195,394.8,系列4,2016-01-01T03:19:35,YND
1,391.51,225.365,系列4,2016-01-01T03:19:35,YND
2,713.93,416.185,系列4,2016-01-01T03:19:35,YND
3,557.655,337.225,系列4,2016-01-01T03:19:35,YND
4,628.39,366.835,系列4,2016-01-01T03:19:35,YND


### 時間資料處理，計算利潤

In [5]:
sales_data['訂單時間'] = pd.to_datetime(sales_data['訂單時間'])
sales_data['訂單時間(年)'] = sales_data['訂單時間'].dt.year
sales_data['訂單時間(月)'] = sales_data['訂單時間'].dt.month
sales_data['利潤'] = sales_data['單價'] - sales_data['成本']
sales_data['count'] = 1
sales_data.head()

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all,訂單時間(年),訂單時間(月),利潤,count
0,643.195,394.8,系列4,2016-01-01 03:19:35,YND,2016,1,248.395,1
1,391.51,225.365,系列4,2016-01-01 03:19:35,YND,2016,1,166.145,1
2,713.93,416.185,系列4,2016-01-01 03:19:35,YND,2016,1,297.745,1
3,557.655,337.225,系列4,2016-01-01 03:19:35,YND,2016,1,220.43,1
4,628.39,366.835,系列4,2016-01-01 03:19:35,YND,2016,1,261.555,1


## 資料篩選

### 篩選系列、年份

In [6]:
# 篩選系列
series_data = sales_data[sales_data['系列'] == '系列4']
# 篩選年份
series_data = series_data[series_data['訂單時間(年)'] == 2017]

series_data.head()

Unnamed: 0,單價,成本,系列,訂單時間,廣告代號all,訂單時間(年),訂單時間(月),利潤,count
5315,985.355,488.565,系列4,2017-01-01 03:56:43,critei,2017,1,496.79,1
5316,985.355,498.435,系列4,2017-01-01 03:56:43,critei,2017,1,486.92,1
5317,1464.05,794.535,系列4,2017-01-01 13:02:57,KDPOD,2017,1,669.515,1
5318,1135.05,685.965,系列4,2017-01-01 13:02:57,KDPOD,2017,1,449.085,1
5319,409.605,199.045,系列4,2017-01-01 14:54:39,自然流量,2017,1,210.56,1


### 計算不同月份下的利潤

In [7]:
ad_avg_profit = series_data.groupby('訂單時間(月)', as_index = False)[['利潤', 'count']].sum()
ad_avg_profit['平均利潤'] = ad_avg_profit['利潤'] / ad_avg_profit['count']
ad_avg_profit.head()

Unnamed: 0,訂單時間(月),利潤,count,平均利潤
0,1,159841.36,375,426.243627
1,2,59678.955,158,377.714905
2,3,135516.745,353,383.900127
3,4,207288.095,521,397.865825
4,5,351893.465,923,381.249691


## 資料視覺化

### 透過for迴圈，製作月份標籤

In [8]:
hover_month = []
for i in range(1,13):
    hover_month.append(str(i)+'月份')

print(hover_month)

['1月份', '2月份', '3月份', '4月份', '5月份', '6月份', '7月份', '8月份', '9月份', '10月份', '11月份', '12月份']


### 繪圖

In [9]:
# 圖表內容
trace = go.Scatter(x= hover_month,
    y=ad_avg_profit['平均利潤'],
    marker={'color': '#669966'},
    mode='lines+markers',line={'width': 2},
    name ='每月平均廣告利潤',showlegend=True)
# 佈景主題設定
layout = go.Layout(title={'text': "2017年廣告利潤平均",'y':0.95,'x':0.5},
    yaxis_title='平均利潤',
    xaxis={'title': 'Month'},
    width=1800,
    height=900,
    font=dict(size=20,
        color="Black"))
# 產出圖表
ad_figure = go.Figure(data = trace, layout = layout)
ad_figure.show()

## 另存為網頁檔以及圖檔

In [10]:
# 圖檔
ad_figure.write_image('CH3-2產出成果_2017年－系列4_總廣告平均利潤圖.png')
# 網頁檔
py.plot(ad_figure, filename='CH3-2產出成果_2017年－系列4_總廣告平均利潤圖', auto_open=True)


Your filename `CH3-2產出成果_2017年－系列4_總廣告平均利潤圖` didn't end with .html. Adding .html to the end of your file.



'CH3-2產出成果_2017年－系列4_總廣告平均利潤圖.html'

In [11]:
# 圖檔
py.plot(ad_figure, filename='CH3-2產出成果_2017年－系列4_總廣告平均利潤圖', image="png", image_filename='CH3-2產出成果_2017年－系列4_總廣告平均利潤圖')

'CH3-2產出成果_2017年－系列4_總廣告平均利潤圖.html'

***
## `每月廣告效益分析折現圖`

### 所有廣告名稱

In [12]:
ad_types=series_data['廣告代號all'].unique()
ad_profit_df = pd.DataFrame(ad_types, columns=["廣告代號"])
ad_types

array(['critei', 'KDPOD', '自然流量', 'B2KMK', 'ikip', 'B2KPOD', 'KDP',
       'GINEP', 'edmP', 'edmM', 'B2KP', 'KDPM', 'KDM', 'B2K', 'GINEM',
       'qdwit', 'B2KM', 'pic'], dtype=object)

### 透過迴圈，計算每月平均廣告利潤

In [13]:
 for month in range(1,13):
    # 選擇月份
    select_month = series_data[series_data['訂單時間(月)'] == month]
    profit = []
    # 選擇廣告
    for ad in ad_types:
        select_ad = select_month[select_month['廣告代號all']== ad]
        select_ad = select_ad.groupby('廣告代號all')[['利潤', 'count']].sum()
        select_ad['平均利潤'] = select_ad['利潤'] / select_ad['count'] 
        # 取出平均利潤
        if not select_ad.empty:
            ad_profit = select_ad['平均利潤'][0]
        elif select_ad.empty:
            ad_profit= None
        profit.append(ad_profit)
    month_profit = {"廣告代號": ad_types,str(month)+'月份' : profit} #+"月份"
    month_profit = pd.DataFrame(month_profit)  
    ad_profit_df = pd.merge(ad_profit_df, month_profit, on='廣告代號', how='left')

# 調整格式
ad_profit_df = ad_profit_df.set_index('廣告代號')
ad_profit_df = ad_profit_df.T
ad_profit_df

廣告代號,critei,KDPOD,自然流量,B2KMK,ikip,B2KPOD,KDP,GINEP,edmP,edmM,B2KP,KDPM,KDM,B2K,GINEM,qdwit,B2KM,pic
1月份,407.916711,429.984352,418.371646,461.916,531.335,455.665,,,,,,,,,,,,
2月份,381.494853,452.2105,373.389297,,340.844,428.5225,366.835,347.50625,388.22,210.56,,,,,,,,
3月份,361.9,556.01,411.436546,,318.033333,,367.843812,448.865667,387.763056,,289.52,,,,,,,
4月份,421.995909,,401.152078,,440.6955,,388.46346,391.0165,349.014167,,338.0475,,,,,,,
5月份,414.251404,,382.832029,,274.715,,376.803372,364.3675,532.322,387.671667,687.61,336.1064,200.69,,,,,
6月份,448.246981,,379.185556,,410.592,,352.890756,382.005556,336.05,,414.54,387.454505,,608.65,157.92,,,
7月份,421.12,,400.233028,,323.305769,,416.439859,258.813333,291.165,,448.810833,300.048,,,,679.385,,
8月份,346.6344,,383.936146,,394.8,,367.298974,414.54,356.965,,526.4,290.3425,,,,,,
9月份,414.927059,,386.898607,,484.726667,,358.460455,484.726667,602.07,,,,,,,,185.885,
10月份,381.875,,413.73859,,,,467.070333,396.445,272.521667,,618.52,,,,,,,396.445


## 篩選高於每年平均的廣告

### 計算廣告平均利潤

In [14]:
ad_year_avg_profit = ad_avg_profit['平均利潤'].mean()
ad_year_avg_profit

399.21267809253

### 進行篩選

In [15]:
for i in ad_profit_df.columns:
    if ad_profit_df[i].mean() > ad_year_avg_profit: 
        print(i+'_具有分析價值')
    elif  ad_profit_df[i].mean() <= ad_year_avg_profit:
        ad_profit_df = ad_profit_df.drop(columns = i )

critei_具有分析價值
KDPOD_具有分析價值
自然流量_具有分析價值
B2KMK_具有分析價值
B2KPOD_具有分析價值
B2KP_具有分析價值
B2K_具有分析價值
qdwit_具有分析價值


## 資料視覺化

In [16]:
# 調整資料格式
ad_profit_df = ad_profit_df.T

# 繪圖色卡
colors = ['#990066','#FFCC00','#000033','#663300','#FF6600','#663333','#CC6666','#FF6666','#339900']

# 透過迴圈，逐一繪製每個廣告的平均利潤
traces = []
for i in range(len(ad_profit_df.index)):
    pl_y = ad_profit_df.iloc[i,:].tolist()
    color = colors[i]
    trace = go.Scatter(x=hover_month,y= pl_y,
                       marker= {'color': colors[i]},
                       mode='lines+markers',line={'width': 2},
                       name = ad_profit_df.index[i]+'廣告')
    traces.append(trace)

### 標示每月最適廣告

In [17]:
recommend_ad = [] # 最高利潤廣告名稱
recommend_ad_profit = [] # 最高利潤廣告利潤額
recommend_ad_list = [] # 前五大利潤廣告

for i in ad_profit_df.columns:
    ad_profit_df = ad_profit_df.sort_values(i,ascending =False)
    recommend_ad.append(ad_profit_df[i].index[0])
    recommend_ad_list.append(ad_profit_df[i].index[[0,1,2,3,4]].tolist())
    recommend_ad_profit.append(ad_profit_df[i][0])

In [18]:
max_point = go.Scatter(x= hover_month,
    y=recommend_ad_profit,
    marker=dict(size = 20, color='rgba(255,235,205,0.5)',line=dict(color='#666666',width=2)),
    hovertext=recommend_ad,
    mode='markers',
    name = "每月最適廣告")

traces.append(max_point)

### 繪製各廣告每月平均利潤

In [19]:
# 佈景主題設定
layout = go.Layout(title={'text': "2017年每月廣告效益分析",'y':0.95,'x':0.5},
    yaxis_title='利潤',
    xaxis={'title': '月份'},
    width=1800,
    height=900,
    boxmode='group',
    font=dict(size=20,color="lightslategrey"))
# 產出圖表
ad_figure = go.Figure(data = traces, layout = layout)
ad_figure.show()

## 另存為網頁檔以及圖檔

In [20]:
# 圖檔
ad_figure.write_image('CH3-2產出成果_2017年－系列4_每月廣告效益分析圖.png')
# 網頁檔
py.plot(ad_figure, filename='CH3-2產出成果_2017年－系列4_每月廣告效益分析圖', auto_open=True)


Your filename `CH3-2產出成果_2017年－系列4_每月廣告效益分析圖` didn't end with .html. Adding .html to the end of your file.



'CH3-2產出成果_2017年－系列4_每月廣告效益分析圖.html'

## 另存推薦清單

In [21]:
recommend_ad_list = pd.DataFrame(recommend_ad_list,index =hover_month)
recommend_ad_list.to_csv('CH3-2產出成果_2017年－系列4_各月份推薦廣告名單.csv',encoding = 'utf-8-sig')
recommend_ad_list

Unnamed: 0,0,1,2,3,4
1月份,B2KMK,B2KPOD,KDPOD,自然流量,critei
2月份,KDPOD,B2KPOD,critei,自然流量,B2KMK
3月份,KDPOD,自然流量,critei,B2KP,B2KPOD
4月份,critei,自然流量,B2KP,KDPOD,B2KPOD
5月份,B2KP,critei,自然流量,KDPOD,B2KPOD
6月份,B2K,critei,B2KP,自然流量,KDPOD
7月份,qdwit,B2KP,critei,自然流量,B2K
8月份,B2KP,自然流量,critei,qdwit,B2K
9月份,critei,自然流量,B2KP,qdwit,B2K
10月份,B2KP,自然流量,critei,qdwit,B2K
