# Imports

In [156]:
import pandas as pd
from numpy.random import beta as rbeta, gamma as rgamma

# Functions

In [157]:
def probability_B_beats_A(alpha_a, beta_a, k_a, theta_a, alpha_b, beta_b, k_b, theta_b, m_samples):
    if (alpha_a <= 0 or beta_a <= 0 or k_a <= 0 or theta_a <= 0
    or alpha_b <= 0 or beta_b <= 0 or k_b <=0 or theta_b <= 0):
        lambdaA = 0
        lambdaB = 0
        omegaA = 0
        omegaB = 0
    else:
        lambdaA = rbeta(alpha_a, beta_a, size=m_samples)
        lambdaB = rbeta(alpha_b, beta_b, size=m_samples)
        omegaA = rgamma(size=m_samples, shape = k_a, scale = theta_a)
        omegaB = rgamma(size=m_samples, shape = k_b, scale = theta_b)  
        convProbBbeatsA = sum(lambdaB > lambdaA) / m_samples     
        revProbBbeatsA = sum(1/omegaB > 1/omegaA) / m_samples
        arpuProbBbeatsA = sum(lambdaB/omegaB > lambdaA/omegaA) / m_samples

    return convProbBbeatsA, revProbBbeatsA, arpuProbBbeatsA

# Data Preparation

In [158]:
result = pd.read_csv("dataset_for_experiment_result.csv", sep=";")
result.revenue = result.revenue.str.replace(",", ".")
result.revenue = pd.to_numeric(result.revenue)

In [159]:
result = result.groupby(["id", "group_name", "os"], as_index=False).agg({"1_month_retention": "mean", "revenue": "sum"})

In [160]:
result.head()

Unnamed: 0,id,group_name,os,1_month_retention,revenue
0,00003044-d7c4-4734-9d2f-9e50d5ce6a7d,a,Android,1,15.0
1,0000effd-4d00-46dc-8f0c-22c9c5e381df,a,Android,1,0.5
2,00039bcd-d6ee-46fb-bf91-11d10bdf8de0,a,Android,1,20.0
3,0004c819-3348-4cf1-8a3d-7447e35e1d72,a,Android,0,3.258179
4,000728AB-086A-48CB-91ED-E0BBFAB15598,a,iOS,1,0.0


In [161]:
tmp_a_A = result[(result.group_name == "a") & (result.os == "Android")].drop(columns=["group_name", "os"])
tmp_a_i = result[(result.group_name == "a") & (result.os == "iOS")].drop(columns=["group_name", "os"])
tmp_c_A = result[(result.group_name == "control") & (result.os == "Android")].drop(columns=["group_name", "os"])
tmp_c_i = result[(result.group_name == "control") & (result.os == "iOS")].drop(columns=["group_name", "os"])

# Results

In [162]:
new_res_ret = result.groupby(["group_name", "os"], as_index=False).agg({"1_month_retention": ["count", "sum", "mean"]})
new_res_ret

Unnamed: 0_level_0,group_name,os,1_month_retention,1_month_retention,1_month_retention
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,sum,mean
0,a,Android,11635,5838,0.501762
1,a,iOS,4890,3871,0.791616
2,control,Android,11793,5892,0.499618
3,control,iOS,4772,3776,0.791282


In [163]:
new_res_rev = result[result.revenue != 0].groupby(["group_name", "os"], as_index=False).agg({"revenue": ["count", "mean", "sum"]})
new_res_rev

Unnamed: 0_level_0,group_name,os,revenue,revenue,revenue
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,sum
0,a,Android,5690,8.983502,51116.127454
1,a,iOS,2688,7.59362,20411.651903
2,control,Android,5774,8.768163,50627.372443
3,control,iOS,2689,7.129589,19171.465474


# Android

In [168]:
alpha_a = 1 + 5892 
beta_a = 1 + (11793 - 5892) 
alpha_b = 1 + 5838 
beta_b = 1 + (11635 - 5838)
k_a = 5774
k_b = 5690
theta_a = 1 / (1 + 50627)
theta_b = 1 / (1 + 51116)
m_samples = 100000
probability_B_beats_A(alpha_a, beta_a, k_a, theta_a, alpha_b, beta_b, k_b, theta_b, m_samples)

(0.62776, 0.90342, 0.89552)

В категории Android пользователей, по показателю 1 month retention группа "a" победила группу "control" с вероятностью 62,8%, а по показателю revenue с вероятностью 90,3%.

# iOS

In [167]:
alpha_a = 1 + 3776 
beta_a = 1 + (4772 - 3776) 
alpha_b = 1 + 3871 
beta_b = 1 + (4890 - 3871)
k_a = 2689
k_b = 2688
theta_a = 1 / (1 + 19171)
theta_b = 1 / (1 + 20411)
m_samples = 100000
probability_B_beats_A(alpha_a, beta_a, k_a, theta_a, alpha_b, beta_b, k_b, theta_b, m_samples)

(0.51625, 0.98923, 0.98488)

В категории iOS пользователей, по показателю 1 month retention группа "a" победила группу "control" с вероятностью 51,6%, а по показателю revenue с вероятностью 98,9%.

used link 1 - [https://www.evanmiller.org/bayesian-ab-testing.html]

used link 2 - [https://github.com/Vidogreg/bayes-ab-testing/blob/master/bayes-arpu-test/utilities.R]