In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
np.set_printoptions(legacy="1.25")
import pandas as pd
from bayesian_testing.metrics import eval_bernoulli_agg, eval_delta_lognormal_agg

In [2]:
df = pd.read_csv("data/session_data.csv")

In [3]:
# example session data - each row represent one session
len(df)
df.head()

94500

Unnamed: 0,conversion,date,revenue,source,variant
0,0,2021-08-07,0.0,desktop,B
1,1,2021-08-05,7.241015,desktop,C
2,0,2021-08-06,0.0,desktop,A
3,0,2021-08-05,0.0,desktop,C
4,0,2021-08-03,0.0,desktop,A


In [4]:
# summary statistics per variant

summary = df.groupby('variant')[['variant', 'conversion', 'revenue']]\
            .agg({'variant': 'count', 'conversion': 'sum','revenue': 'sum'})\
            .rename(columns = {'variant': 'sessions', 'conversion': 'conversions'})

summary['conversion_rate'] = summary['conversions'] / summary['sessions']
summary['revenue_per_session'] = summary['revenue'] / summary['sessions']
summary['revenue_per_converted_sessions'] = summary['revenue'] / summary['conversions']

summary

Unnamed: 0_level_0,sessions,conversions,revenue,conversion_rate,revenue_per_session,revenue_per_converted_sessions
variant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A,31500,1580,30830.025613,0.050159,0.978731,19.512674
B,32000,1700,35203.216888,0.053125,1.100101,20.707775
C,31000,1550,37259.563364,0.05,1.201921,24.038428


In [5]:
variant_A = df['revenue'][(df.variant == 'A')].values
variant_B = df['revenue'][(df.variant == 'B')].values
variant_C = df['revenue'][(df.variant == 'C')].values

sessions = [
    variant_A.size,
    variant_B.size,
    variant_C.size
]

conversions = [
    sum(variant_A > 0),
    sum(variant_B > 0),
    sum(variant_C > 0)
]

sum_log_revenue = [
    np.log(variant_A[variant_A > 0]).sum(),
    np.log(variant_B[variant_B > 0]).sum(),
    np.log(variant_C[variant_C > 0]).sum()
]

sum_log_2_revenue = [
    np.square(np.log(variant_A[variant_A > 0])).sum(),
    np.square(np.log(variant_B[variant_B > 0])).sum(),
    np.square(np.log(variant_C[variant_C > 0])).sum()
]

sessions
conversions
sum_log_revenue
sum_log_2_revenue

[31500, 32000, 31000]

[1580, 1700, 1550]

[3831.806394737816, 4211.72986767986, 4055.965234848171]

[11029.923165846496, 12259.51868396913, 12357.911862914]

## Results

In [6]:
# conversion rate probabilities of being best, expected loss and credible intervals for each variant
pbbs, loss, intervals = eval_bernoulli_agg(sessions, conversions)
print(f"Probabilities of being best: {pbbs}")
print(f"Expected loss: {loss}")
print(f"95% credible intervals: {intervals}")

Probabilities of being best: [0.043, 0.92335, 0.03365]
Expected loss: [0.0030022, 5.89e-05, 0.0031487]
95% credible intervals: [[0.0477987, 0.0525911], [0.0506903, 0.0556017], [0.0476257, 0.0524881]]


In [7]:
# revenue per session probabilities of being best, expected loss and credible intervals for each variant
pbbs, loss, intervals = eval_delta_lognormal_agg(sessions, conversions, sum_log_revenue, sum_log_2_revenue)
print(f"Probabilities of being best: {pbbs}")
print(f"Expected loss: {loss}")
print(f"95% credible intervals: {intervals}")

Probabilities of being best: [0.0002, 0.03395, 0.96585]
Expected loss: [0.2212336, 0.1210695, 0.0008982]
95% credible intervals: [[0.9086416, 1.0649507], [1.0043019, 1.170394], [1.1094296, 1.3069562]]


### Results for "being best" = "being minimum"

In [8]:
# conversion rate probabilities of being best, expected loss and credible intervals for each variant
pbbs, loss, intervals = eval_bernoulli_agg(sessions, conversions, min_is_best=True)
print(f"Probabilities of being best: {pbbs}")
print(f"Expected loss: {loss}")
print(f"95% credible intervals: {intervals}")

Probabilities of being best: [0.4572, 0.00945, 0.53335]
Expected loss: [0.0007868, 0.00374, 0.00062]
95% credible intervals: [[0.0478316, 0.0526332], [0.050685, 0.0556378], [0.0476584, 0.0524571]]


In [9]:
# revenue per session probabilities of being best, expected loss and credible intervals for each variant
pbbs, loss, intervals = eval_delta_lognormal_agg(sessions, conversions, sum_log_revenue, sum_log_2_revenue, min_is_best=True)
print(f"Probabilities of being best: {pbbs}")
print(f"Expected loss: {loss}")
print(f"95% credible intervals: {intervals}")

Probabilities of being best: [0.95695, 0.04285, 0.0002]
Expected loss: [0.0010886, 0.1012619, 0.2202282]
95% credible intervals: [[0.9073725, 1.0666041], [1.0044587, 1.1692741], [1.1082288, 1.305592]]
