In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
import pandas as pd
from bayes_ab_test.experiments import ConversionTest

In [2]:
df = pd.read_csv("data/session_data.csv")

In [3]:
# example session data - each row represent one session
len(df)
df.head()

94500

Unnamed: 0,conversion,date,revenue,source,variant
0,0,2021-08-07,0.0,desktop,B
1,1,2021-08-05,7.241015,desktop,C
2,0,2021-08-06,0.0,desktop,A
3,0,2021-08-05,0.0,desktop,C
4,0,2021-08-03,0.0,desktop,A


In [4]:
# summary statistics per variant

summary = df.groupby('variant')[['variant', 'conversion', 'revenue']]\
            .agg({'variant': 'count', 'conversion': 'sum','revenue': 'sum'})\
            .rename(columns = {'variant': 'sessions', 'conversion': 'conversions'})

summary['conversion_rate'] = summary['conversions'] / summary['sessions']
summary['revenue_per_session'] = summary['revenue'] / summary['sessions']
summary['revenue_per_converted_sessions'] = summary['revenue'] / summary['conversions']

summary

Unnamed: 0_level_0,sessions,conversions,revenue,conversion_rate,revenue_per_session,revenue_per_converted_sessions
variant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A,31500,1580,30830.025613,0.050159,0.978731,19.512674
B,32000,1700,35203.216888,0.053125,1.100101,20.707775
C,31000,1550,37259.563364,0.05,1.201921,24.038428


In [5]:
variant_A = df['revenue'][(df.variant == 'A')].values
variant_B = df['revenue'][(df.variant == 'B')].values
variant_C = df['revenue'][(df.variant == 'C')].values

sessions = [
    variant_A.size,
    variant_B.size,
    variant_C.size
]

conversions = [
    sum(variant_A > 0),
    sum(variant_B > 0),
    sum(variant_C > 0)
]

sum_log_revenue = [
    np.log(variant_A[variant_A > 0]).sum(),
    np.log(variant_B[variant_B > 0]).sum(),
    np.log(variant_C[variant_C > 0]).sum()
]

sum_log_2_revenue = [
    np.square(np.log(variant_A[variant_A > 0])).sum(),
    np.square(np.log(variant_B[variant_B > 0])).sum(),
    np.square(np.log(variant_C[variant_C > 0])).sum()
]

sessions
conversions
sum_log_revenue
sum_log_2_revenue

[31500, 32000, 31000]

[1580, 1700, 1550]

[3831.806394737816, 4211.72986767986, 4055.965234848171]

[11029.923165846496, 12259.51868396913, 12357.911862914]

In [6]:
conv_test = ConversionTest(
    variant_names=["A", "B", "C"],
    totals=sessions,
    successes=conversions
)

In [7]:
conv_test.probabs_of_being_best

[0.04215, 0.92105, 0.0368]

In [8]:
conv_test.evaluate()

[{'variant': 'A',
  'totals': 31500,
  'successes': 1580,
  'conv. rate': 0.05016,
  'prob. being best': 0.0412},
 {'variant': 'B',
  'totals': 32000,
  'successes': 1700,
  'conv. rate': 0.05312,
  'prob. being best': 0.92485},
 {'variant': 'C',
  'totals': 31000,
  'successes': 1550,
  'conv. rate': 0.05,
  'prob. being best': 0.03395}]

In [9]:
conv_test.add_variant("D", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

In [10]:
conv_test.probabs_of_being_best

[0.0135, 0.3177, 0.0118, 0.657]

In [11]:
conv_test.evaluate()

[{'variant': 'A',
  'totals': 31500,
  'successes': 1580,
  'conv. rate': 0.05016,
  'prob. being best': 0.012},
 {'variant': 'B',
  'totals': 32000,
  'successes': 1700,
  'conv. rate': 0.05312,
  'prob. being best': 0.32055},
 {'variant': 'C',
  'totals': 31000,
  'successes': 1550,
  'conv. rate': 0.05,
  'prob. being best': 0.0118},
 {'variant': 'D',
  'totals': 15,
  'successes': 1,
  'conv. rate': 0.06667,
  'prob. being best': 0.65565}]