Statistical Analysis untuk Test1_Menu

In [14]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

In [17]:
# 1. Fungsi Manual Shapiro-Francia (W')
def shapiro_francia(x):
    n = len(x)
    x_sorted = np.sort(x)
    
    # Menghitung m_i: ekspektasi statistik urutan normal (Blom's Approximation)
    i = np.arange(1, n + 1)
    m = stats.norm.ppf((i - 3/8) / (n + 1/4))
    
    # W' adalah korelasi kuadrat antara data asli dan ekspektasi normal
    correlation_matrix = np.corrcoef(x_sorted, m)
    w_prime = correlation_matrix[0, 1]**2
    
    # Aproksimasi P-Value (Royston, 1993)
    v = np.log(n)
    mu = -1.2725 + 1.0521 * v
    sigma = np.exp(1.0308 - 0.26758 * v)
    
    z = (np.log(1 - w_prime) - mu) / sigma
    p_val = stats.norm.sf(z) 
    
    return w_prime, p_val

# 2. Load Data (Gunakan separator caret '^' sesuai file Anda)
path = r"D:\@02 Personal\@03 Mini Data Project\@07 DEC Portfolio\data\raw\test1_menu.csv"
df1 = pd.read_csv(path, sep='^')
df1.columns = df1.columns.str.strip()

# 3. Uji Normalitas pada Revenue
rev_a = df1[df1['variant'] == 'A_horizontal_menu']['revenue']
rev_b = df1[df1['variant'] == 'B_dropdown_menu']['revenue']

w_a, p_a = shapiro_francia(rev_a)
w_b, p_b = shapiro_francia(rev_b)

print(f"--- Shapiro-Francia Normality Test ---")
print(f"Variant A (Horizontal): W' = {w_a:.4f}, p-value = {p_a:.10f}")
print(f"Variant B (Dropdown)  : W' = {w_b:.4f}, p-value = {p_b:.10f}")

# 4. Kesimpulan Otomatis
alpha = 0.05
w_threshold = 0.98  # Standar W' untuk data yang dianggap mendekati normal

# Kita gunakan logika 'OR' - jika p-value kecil ATAU W' rendah, maka data TIDAK normal
if (p_a < alpha or p_b < alpha) or (w_a < w_threshold or w_b < w_threshold):
    print("\n[ANALYSIS] Result: Data is NOT normally distributed.")
    print(f"Observation: Variant A W'({w_a:.4f}) and Variant B W'({w_b:.4f}) are below {w_threshold}.")
    print("Recommendation: Trust the Mann-Whitney U test for a more robust conclusion.")
else:
    print("\n[ANALYSIS] Result: Data follows a normal distribution.")
    print("Recommendation: Proceed with Welch's T-test.")

--- Shapiro-Francia Normality Test ---
Variant A (Horizontal): W' = 0.8811, p-value = 1.0000000000
Variant B (Dropdown)  : W' = 0.8856, p-value = 1.0000000000

[ANALYSIS] Result: Data is NOT normally distributed.
Observation: Variant A W'(0.8811) and Variant B W'(0.8856) are below 0.98.
Recommendation: Trust the Mann-Whitney U test for a more robust conclusion.


Uji Normalitas tidaklah normal, maka dilanjutkan dengan uji statistik Mann-Whitney U Test

In [19]:
# Menggunakan Mann-Whitney U karena data tidak normal (W' < 0.98)

u_stat, p_val_mw = stats.mannwhitneyu(treatment['revenue'], control['revenue'], alternative='two-sided')

# Menghitung Median (karena untuk data tidak normal, Median lebih jujur daripada Mean)
median_a = control['revenue'].median()
median_b = treatment['revenue'].median()

print(f"--- Mann-Whitney U Test Results (Revenue) ---")
print(f"Median Variant A: {median_a:.4f}")
print(f"Median Variant B: {median_b:.4f}")
print(f"P-Value         : {p_val_mw:.10f}")

# Penentuan Signifikansi
alpha = 0.05
if p_val_mw < alpha:
    print("\n[RESULT] Kesimpulan: STATISTICALLY SIGNIFICANT.")
    print("Terdapat perbedaan Revenue antara menu Vertical dan Horizontal.")
else:
    print("\n[RESULT] Kesimpulan: NOT SIGNIFICANT.")
    print("Setiap perbedaan revenue belum terbukti secara kuat.")

--- Mann-Whitney U Test Results (Revenue) ---
Median Variant A: 2.8624
Median Variant B: 2.6021
P-Value         : 0.0000000238

[RESULT] Kesimpulan: STATISTICALLY SIGNIFICANT.
Terdapat perbedaan Revenue antara menu Vertical dan Horizontal.
