In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline
from statsmodels.stats.proportion import proportions_ztest

In [2]:
df_web_exp = pd.read_csv('cleaned_data/data_web_exp.csv')
df_web_exp['process_step'].value_counts()

process_step
start      103403
step_1      67932
step_2      56425
step_3      48284
confirm     42828
Name: count, dtype: int64

In [27]:
# Group by 'visitor_id' and count visits for both groups
visitors_grouped = df_web_exp.groupby('visitor_id')

# Define the required process steps
required_steps = ['start', 'step_1', 'step_2', 'step_3', 'confirm']

# Filter groups (visitors) where all required steps are present
df_success_visitors = visitors_grouped.filter(lambda x: all(step in x['process_step'].values for step in required_steps))

#Filter groups in Test and Control
df_success_test = df_success_visitors[df_success_visitors['Variation'] == 'Test']
df_success_control = df_success_visitors[df_success_visitors['Variation'] == 'Control']

# Count the number of successful visits for Test and Control
n_success_visits_test = df_success_test['visitor_id'].nunique() #18080

n_success_visits_control = df_success_control['visitor_id'].nunique() #14942
n_success_visits_test,n_success_visits_control

(18080, 14942)

In [29]:
#get total visitor_ids of each variation:
total_visitors_control = df_web_exp[df_web_exp['Variation'] == 'Control']['visitor_id'].nunique() #26075
total_visitors_test = df_web_exp[df_web_exp['Variation'] == 'Test']['visitor_id'].nunique() #29713
total_visitors_control,total_visitors_test

(26075, 29713)

In [31]:
# Calculate the completion rate
completion_rate_test = n_success_visits_test / total_visitors_test
completion_rate_control = n_success_visits_control / total_visitors_control
# Output the completion rate
completion_rate_test,completion_rate_control
#60.85%, 57.30%

(0.6084878672634874, 0.573039309683605)

In [33]:
#H0: New design does not make a difference
#H1: There is a difference
#perform z-test with total visitor_ids in each testing group and the two corresponding completion rates:
stat, p_value = proportions_ztest(np.array([completion_rate_test*total_visitors_test,completion_rate_control*total_visitors_control]), np.array([total_visitors_test,total_visitors_control]))
stat, p_value
#p-value: 0,000000000000000019 -> very likely to see this data under H0

(8.499812039682517, 1.8989792745105827e-17)

In [19]:
#H0: new design is not better by a 5% threshold
#H1:new design is better by a 5% threshold
proportions_ztest(np.array([0.6107155987833818*29919,0.575522319899532*26277]), np.array([29919,26277]), value=0.05, alternative='larger')
#p-value = 0,9998 --> very likely to see such data under H0: doesnt look like we can reach the 5% threshold with this test

(-3.566607797228306, 0.9998191839548345)

In [35]:
#same procedure for visit_ids now:
#Group by 'visit_id' and count visits for both groups
visits_grouped = df_web_exp.groupby('visit_id')

# Define the required process steps
required_steps = ['start', 'step_1', 'step_2', 'step_3', 'confirm']

# Filter groups (visits) where all required steps are present
df_success_visits = visits_grouped.filter(lambda x: all(step in x['process_step'].values for step in required_steps))

#Filter groups in Test and Control
df_success_test = df_success_visits[df_success_visits['Variation'] == 'Test']
df_success_control = df_success_visits[df_success_visits['Variation'] == 'Control']

# Count the number of successful visits for Test and Control
n_success_visits_test = df_success_test['visit_id'].nunique() #17684

n_success_visits_control = df_success_control['visit_id'].nunique() #14641

#get total visit_ids of each variation:
total_visits_test = df_web_exp[df_web_exp['Variation'] == 'Test']['visit_id'].nunique() #36897
total_visits_control = df_web_exp[df_web_exp['Variation'] == 'Control']['visit_id'].nunique() #31952
n_success_visits_test,n_success_visits_control,total_visits_test,total_visits_control

(17684, 14641, 36897, 31952)

In [37]:
# Calculate the completion rate
completion_rate_test = n_success_visits_test / total_visits_test
completion_rate_control = n_success_visits_control / total_visits_control
# Output the completion rate
completion_rate_test,completion_rate_control
#48.09%, 46.02%
#H0: New design does not make a difference
#H1: There is a difference
stat, p_value = proportions_ztest(np.array([completion_rate_test*37136,completion_rate_control*32189]), np.array([37136,32189]))
stat, p_value #5.5173, 0,00000003 --> very likely to see this data under H0

(5.541620539073849, 2.996851721997984e-08)

#### Why z-test and yes it is possible without the sigma of the population
Short Answer:
For the Z-test for proportions (which you used to compare the confirmation rates between group A and group B),
you do NOT need the population standard deviation. Instead, the Z-test for proportions uses the sample proportions to
estimate the standard error.

Detailed Explanation:
When you're comparing two proportions (such as confirmation rates for A/B groups), the Z-test formula doesn’t require
the population standard deviation. Instead, it calculates the standard error of the difference in proportions based on the
data you have from your samples.'''

Z-Test for Proportions:
The Z-test is specifically used when you want to compare two proportions (such as confirmation rates) from different groups.
In your case, where you are comparing the confirmation rates of group A (new design) and group B (old design), the Z-test is
commonly used.

When to Use the Z-Test:
Comparing two groups: The Z-test is appropriate when you have two independent groups and you are comparing their proportions
(such as group A and group B). Large sample size: The Z-test relies on the assumption that the sample size is large enough
for the normal approximation to be valid. Generally, each group should have at least 10 successes and 10 failures to ensure
accuracy. Hypothesis testing: It is specifically useful when you want to determine whether the difference between the
two proportions is statistically significant.'''

In [52]:
#### Longer and more detailed code of Z-test for better comprehending

'''import numpy as np
from statsmodels.stats.proportion import proportions_ztest

Data for group A (new design)
confirm_A = 610  # Number of confirmations in group A
total_A = 1000   # Total participants in group A

Data for group B (old design)
confirm_B = 570  # Number of confirmations in group B
total_B = 1000   # Total participants in group B

Combine the data
counts = np.array([confirm_A, confirm_B])  # Number of successes (confirmations) in each group
nobs = np.array([total_A, total_B])        # Number of total participants (sample size) in each group

Perform the Z-test for proportions
stat, p_value = proportions_ztest(counts, nobs)

Output the Z-test statistic and the p-value
print(f"Z-statistic: {stat:.4f}")
print(f"P-value: {p_value:.4f}")

Interpret the result based on a significance level of 0.05
if p_value < 0.05:
    print("Reject the null hypothesis. The difference is statistically significant.")
else:
    print("Fail to reject the null hypothesis. The difference is not statistically significant.")'''

'import numpy as np\nfrom statsmodels.stats.proportion import proportions_ztest\n\nData for group A (new design)\nconfirm_A = 610  # Number of confirmations in group A\ntotal_A = 1000   # Total participants in group A\n\nData for group B (old design)\nconfirm_B = 570  # Number of confirmations in group B\ntotal_B = 1000   # Total participants in group B\n\nCombine the data\ncounts = np.array([confirm_A, confirm_B])  # Number of successes (confirmations) in each group\nnobs = np.array([total_A, total_B])        # Number of total participants (sample size) in each group\n\nPerform the Z-test for proportions\nstat, p_value = proportions_ztest(counts, nobs)\n\nOutput the Z-test statistic and the p-value\nprint(f"Z-statistic: {stat:.4f}")\nprint(f"P-value: {p_value:.4f}")\n\nInterpret the result based on a significance level of 0.05\nif p_value < 0.05:\n    print("Reject the null hypothesis. The difference is statistically significant.")\nelse:\n    print("Fail to reject the null hypothesis