# Hypothesis Testing

In [2]:
#Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st

In [6]:
#Load dataset
df = pd.read_csv("../data/clean/tableau.csv")
columns_to_drop = ['Unnamed: 0']
df = df.drop(columns=columns_to_drop)
df.head()

Unnamed: 0,client_id,tenure_yr,tenure_mnth,age,gender,num_acc,balance,calls,log_ons,variation,...,visit_id,process_step,date_time,date,time,completion_rate,average_step_duration_seconds,avg_duration,error_rate_by_step,error_rate
0,555,3.0,46.0,29.5,u,2.0,25454.66,2.0,6.0,test,...,637149525_38041617439_716659,start,2017-04-15 12:57:56,2017-04-15,12:57:56,0.692927,31.286976,47.831799,0.0,0.103673
1,555,3.0,46.0,29.5,u,2.0,25454.66,2.0,6.0,test,...,637149525_38041617439_716659,step_1,2017-04-15 12:58:03,2017-04-15,12:58:03,0.692927,39.568196,47.831799,0.143494,0.103673
2,555,3.0,46.0,29.5,u,2.0,25454.66,2.0,6.0,test,...,637149525_38041617439_716659,step_2,2017-04-15 12:58:35,2017-04-15,12:58:35,0.692927,67.548211,47.831799,0.172344,0.103673
3,555,3.0,46.0,29.5,u,2.0,25454.66,2.0,6.0,test,...,637149525_38041617439_716659,step_3,2017-04-15 13:00:14,2017-04-15,13:00:14,0.692927,65.540621,47.831799,0.170609,0.103673
4,647,12.0,151.0,57.5,m,2.0,30525.8,0.0,4.0,test,...,40369564_40101682850_311847,start,2017-04-12 15:41:28,2017-04-12,15:41:28,0.692927,31.286976,47.831799,0.0,0.103673


In [7]:
#Filter for 'Test' group
test_group = df[df['variation'] == 'test']
#Filter for 'Control' group
control_group = df[df['variation'] == 'control']

$$H0: Test group completion rate = Control group completion rate$$
$$H1: Test group completion rate \neq Control group completion rate$$

$$H0:Test_{ratio}-Control_{ratio}=0$$
$$H1: Test_{ratio}-Control_{ratio} \ne 0$$

In [8]:
import statsmodels.api as sm

#Calculate the number of completions and total number of clients in the test group
completion_test = test_group[test_group['process_step'] == 'confirm']['client_id'].nunique()
total_test = test_group['client_id'].nunique()

#Calculate the number of completions and total number of clients in the test group
completion_control = control_group[control_group['process_step'] == 'confirm']['client_id'].nunique()
total_control = control_group['client_id'].nunique()

#completion counts and sample sizes
counts = [completion_test, completion_control]
nobs = [total_test, total_control]

#two-proportion z-test
z_stat, p_value = sm.stats.proportions_ztest(counts, nobs)

print(f"Z-statistics: {z_stat:.3f}")
print(f"p-value: {p_value:.3f}")

alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. The completion rates are significantly different.")
else:
    print("Fail to reject the null hypothesis. The completion rates are not significantly different.")

Z-statistics: 13.253
p-value: 0.000
Reject the null hypothesis. The completion rates are significantly different.


With a Z-statistic of 8.893 and a p-value much smaller than 0.05, there is strong evidence to reject the null hypothesis. This result indicates a statistically significant difference in completion rates between the two groups, suggesting that the intervention or change applied to the test group had a significant impact on completion rates

In [None]:
$$H0: Test group completion rate <= Control group completion rate + 5%$$
$$H1: Test group completion rate > Control group completion rate + 5%$$

In [9]:
#Calculate the completion rate of Test group
completion_rate_test = completion_test / total_test
#Calculate the completion rate of Control group
completion_rate_control = completion_control / total_control

#Calculate whether observed increase is higher than the threshold of 5%
observed_increase = completion_rate_test - completion_rate_control
threshold = 0.05

if observed_increase > 0.05:
    print("The observed increase in completion rate meets or exceeds the 5% threshold.")
else:
    print("The observed increase in completion rate does not meet the 5% threshold.")

The observed increase in completion rate does not meet the 5% threshold.


In [12]:
from statsmodels.stats.proportion import proportions_ztest

#Perform hypothesis testing using one-sided z-test
counts = [completion_test, completion_control]
nobs = [total_test, total_control]

threshold = 0.05

#two-proportion z-test (one-sided)
z_statistic, p_val = proportions_ztest(counts, nobs, value=threshold, alternative='larger')

print(f"Z-statistics: {z_statistic:.3f}")
print(f"p-value: {p_val:.3f}")

alpha = 0.05
if p_val < alpha:
    print("Reject the null hypothesis. The completion rates are significantly different.")
else:
    print("Fail to reject the null hypothesis. The completion rates are not significantly different.")

Z-statistics: -7.218
p-value: 1.000
Fail to reject the null hypothesis. The completion rates are not significantly different.


The observed increase in completion rate does not meet the 5% threshold. Therefore, despite any statistical significance, the practical significance criterion set by Vanguard is not met. Hence, the new UI design may not be justified from a cost perspective.