# `單因子變異數分析 - Python實戰：商務資料結構整理`
### 作者：徐子皓
***

### 導入原始資料

In [1]:
import pandas as pd
data = pd.read_csv('CH4-21_廣告交易資料.csv', encoding = 'big5')
data.head()

Unnamed: 0,廣告,消費金額
0,廣告1,20.83
1,廣告1,21.45
2,廣告1,27.09
3,廣告1,14.09
4,廣告1,31.23


### 區分不同廣告系列的消費金額

In [2]:
alist = data[data ['廣告'] == '廣告1']['消費金額'].tolist()
blist = data[data ['廣告'] == '廣告2']['消費金額'].tolist()
clist = data[data ['廣告'] == '廣告3']['消費金額'].tolist()
print('廣告1的前五筆消費金額為:', alist[:5])
print('廣告2的前五筆消費金額為:', blist[:5])
print('廣告3的前五筆消費金額為:', clist[:5])

廣告1的前五筆消費金額為: [20.83, 21.45, 27.09, 14.09, 31.23]
廣告2的前五筆消費金額為: [20.23, 18.1, 12.34, 24.08, 22.39]
廣告3的前五筆消費金額為: [8.81, 20.26, 17.46, 25.48, 22.64]


***

# `單因子變異數分析 - Python實戰：如何決定多廣告的優化策略？`
### 作者：徐子皓
***

## 事前檢定

### 常態性檢定

In [3]:
import scipy.stats as st
print('廣告1的常態性檢定結果：')
print(st.shapiro(alist), '\n')
print('廣告2的常態性檢定結果：')
print(st.shapiro(blist), '\n')
print('廣告3的常態性檢定結果：')
print(st.shapiro(clist))

廣告1的常態性檢定結果：
ShapiroResult(statistic=0.9915865659713745, pvalue=0.6819638013839722) 

廣告2的常態性檢定結果：
ShapiroResult(statistic=0.9929247498512268, pvalue=0.805463969707489) 

廣告3的常態性檢定結果：
ShapiroResult(statistic=0.9802003502845764, pvalue=0.07433854043483734)


### 同質性檢定

In [4]:
st.levene(alist, blist, clist, center='mean')

LeveneResult(statistic=0.6891243668404422, pvalue=0.5026819827621525)

## 單因子變異數分析

In [5]:
f_value, p_value = st.f_oneway(alist, blist, clist)
p_value

0.0955577857122512

## 事後檢定

In [6]:
from pingouin import pairwise_tukey
m_comp = pairwise_tukey(data=data, dv='消費金額', between='廣告')
m_comp

  **kwargs
  **kwargs


Unnamed: 0,A,B,mean(A),mean(B),diff,se,T,p-tukey,hedges
0,廣告1,廣告2,17.135167,15.592417,1.54275,0.781281,1.974642,0.118914,0.254121
1,廣告1,廣告3,17.135167,15.748167,1.387,0.781281,1.77529,0.178294,0.228466
2,廣告2,廣告3,15.592417,15.748167,-0.15575,0.781281,-0.199352,0.9,-0.025655


## 進階資料處理

### 挑選有關的資料欄位

In [7]:
table = m_comp.drop(columns = ['mean(A)', 'mean(B)', 'T', 'p-tukey', 'hedges'])
table

Unnamed: 0,A,B,diff,se
0,廣告1,廣告2,1.54275,0.781281
1,廣告1,廣告3,1.387,0.781281
2,廣告2,廣告3,-0.15575,0.781281


### 製作相反情況的資料

In [8]:
# 「A」欄反轉資料
add_A = table['B'].tolist()
# 「B」欄反轉資料
add_B = table['A'].tolist()
# 「diff」欄反轉資料
diff =  (table['diff'] * -1).tolist()
# 「se」欄反轉資料
se = table['se'].tolist()
# 將反轉資料合併
table2 = pd.DataFrame(zip(add_A, add_B, diff, se), columns = ['A', 'B', 'diff', 'se'])
table2

Unnamed: 0,A,B,diff,se
0,廣告2,廣告1,-1.54275,0.781281
1,廣告3,廣告1,-1.387,0.781281
2,廣告3,廣告2,0.15575,0.781281


### 將原資料與反轉資料進行合併

In [9]:
new_table = pd.concat([table, table2], ignore_index=True)
new_table

Unnamed: 0,A,B,diff,se
0,廣告1,廣告2,1.54275,0.781281
1,廣告1,廣告3,1.387,0.781281
2,廣告2,廣告3,-0.15575,0.781281
3,廣告2,廣告1,-1.54275,0.781281
4,廣告3,廣告1,-1.387,0.781281
5,廣告3,廣告2,0.15575,0.781281


### 制定上下界

In [10]:
new_table['上界'] = new_table['diff'] + new_table['se']*1.96
new_table['下界'] = new_table['diff'] - new_table['se']*1.96
new_table

Unnamed: 0,A,B,diff,se,上界,下界
0,廣告1,廣告2,1.54275,0.781281,3.074061,0.011439
1,廣告1,廣告3,1.387,0.781281,2.918311,-0.144311
2,廣告2,廣告3,-0.15575,0.781281,1.375561,-1.687061
3,廣告2,廣告1,-1.54275,0.781281,-0.011439,-3.074061
4,廣告3,廣告1,-1.387,0.781281,0.144311,-2.918311
5,廣告3,廣告2,0.15575,0.781281,1.687061,-1.375561


### 判斷比較結果是否顯著

In [11]:
import numpy as np
justice = []
# 判斷是否顯著
for i in range(0,new_table.shape[0]):
    upper = np.sign(new_table.iloc[i,4]) # 上界正負數判斷
    lower = np.sign(new_table.iloc[i,5]) # 下界正負數判斷
    # 如果上下界同時為正數或負數，則代表顯著
    if upper == lower:
        justice.append('Yes')
    # 否則代表不顯著
    else:
        justice.append('No')
new_table['是否顯著'] = justice
new_table

Unnamed: 0,A,B,diff,se,上界,下界,是否顯著
0,廣告1,廣告2,1.54275,0.781281,3.074061,0.011439,Yes
1,廣告1,廣告3,1.387,0.781281,2.918311,-0.144311,No
2,廣告2,廣告3,-0.15575,0.781281,1.375561,-1.687061,No
3,廣告2,廣告1,-1.54275,0.781281,-0.011439,-3.074061,Yes
4,廣告3,廣告1,-1.387,0.781281,0.144311,-2.918311,No
5,廣告3,廣告2,0.15575,0.781281,1.687061,-1.375561,No


## 視覺化呈現

### 繪圖

In [12]:
import plotly.offline as py
import plotly.graph_objects as go
fig = go.Figure()
# 透過迴圈，逐一繪製每筆資料
for i in range(0, new_table.shape[0]):
    if new_table.iloc[i,6] == 'Yes':
        color = 'firebrick'
        name = '顯著'
    else:
        color = 'green'
        name = '不顯著'
    fig.add_trace(go.Scatter(
        x = [new_table.iloc[i,5], new_table.iloc[i,2], new_table.iloc[i,4]],
        y = [new_table.iloc[i,0] + '-' + new_table.iloc[i,1], new_table.iloc[i,0] + '-' + new_table.iloc[i,1], new_table.iloc[i,0] + '-' + new_table.iloc[i,1]],
        mode = "lines+markers",
        textfont=dict(
            family="sans serif",
            size=16,
            color=color),    
            line=dict(color=color, width=2),
        name = name,
        legendgroup = name,
        ))

### 佈景主題設定

In [13]:
fig.update_layout(
    title={
        'text': "<b>One-Way ANOVA 廣告效益分析</b>",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',},
    width=1800,
    height=960,
    boxmode='group',
    font=dict(
        family="Courier New, monospace",
        size=20,
        color="lightslategrey"
    )
    )   

### 將產出另存新檔

In [14]:
# 另存互動式網頁
py.plot(fig, filename='CH4-22產出成果_廣告效益分析圖', auto_open=True)
# 另存.png圖檔
fig.write_image("CH4-22產出成果_廣告效益分析圖.png")


Your filename `CH4-22產出成果_廣告效益分析圖` didn't end with .html. Adding .html to the end of your file.

