In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats

In [2]:
df = pd.read_csv('./ab_test.csv')

In [3]:
df.columns = ['user_id', 'timestamp', 'group', 'landing_page', 'converted']

In [4]:
fdf = df[(df["group"] == "treatment") & (df["landing_page"] == "new_page") |
              (df["group"] == "control") & (df["landing_page"] == "old_page")]

In [6]:
# Set the prior distribution parameters
alpha_control_prior = 1
beta_control_prior = 1
alpha_treatment_prior = 1
beta_treatment_prior = 1

# Create prior beta distributions for control and treatment groups
prior_control = stats.beta(alpha_control_prior, beta_control_prior)
prior_treatment = stats.beta(alpha_treatment_prior, beta_treatment_prior)

In [7]:
control = fdf[fdf['group'] == 'control']
treatment = fdf[fdf['group'] == 'treatment']

conversions_control = control.converted.sum()
conversions_treatment = treatment.converted.sum()
n_control = control.converted.count()
n_treatment = treatment.converted.count()

non_conversions_control = n_control - conversions_control
non_conversions_treatment = n_treatment - conversions_treatment

In [8]:
alpha_control_posterior = alpha_control_prior + conversions_control
beta_control_posterior = beta_control_prior + non_conversions_control
alpha_treatment_posterior = alpha_treatment_prior + conversions_treatment
beta_treatment_posterior = beta_treatment_prior + non_conversions_treatment

# Create posterior beta distributions for control and treatment groups
posterior_control = stats.beta(alpha_control_posterior, beta_control_posterior)
posterior_treatment = stats.beta(alpha_treatment_posterior, beta_treatment_posterior)

In [9]:
samples = 100000

# Draw samples from the posterior distributions
control_samples = posterior_control.rvs(samples)
treatment_samples = posterior_treatment.rvs(samples)

# Calculate the probability that the treatment group's conversion rate is higher
prob_treatment_better = (treatment_samples > control_samples).mean()

print(f"Probability that treatment group's conversion rate is higher: {prob_treatment_better}")

Probability that treatment group's conversion rate is higher: 0.09635


This probability is a direct interpretation of the data in the context of your A/B test. A probability of 0.09635 (approximately 9.63%) means that there is a 9.63% chance that the treatment group's conversion rate is higher than the control group's conversion rate, given the observed data and the prior distributions you've chosen.

In this case, the probability that the treatment group's conversion rate is higher is quite low (less than 10%). This suggests that the treatment group is unlikely to perform better than the control group, based on the available data.