In [2]:
import pandas as pd
import numpy as np

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Для параллельного контроля при эксперименте может использоваться A/A/B-тест:

In [27]:
logs_data = pd.read_csv("../data/logs_exp_us.csv", sep='\t')
logs_data = logs_data.drop_duplicates()
logs_data['ExpId'] = logs_data['ExpId'].replace({246: 'control_1', 247: 'control_2', 248: 'test'})
logs_data = logs_data.rename(columns={'ExpId': 'TestGroup'})
logs_data

Unnamed: 0,EventName,DeviceIDHash,EventTimestamp,TestGroup
0,MainScreenAppear,4575588528974610257,1564029816,control_1
1,MainScreenAppear,7416695313311560658,1564053102,control_1
2,PaymentScreenSuccessful,3518123091307005509,1564054127,test
3,CartScreenAppear,3518123091307005509,1564054127,test
4,PaymentScreenSuccessful,6217807653094995999,1564055322,test
...,...,...,...,...
244121,MainScreenAppear,4599628364049201812,1565212345,control_2
244122,MainScreenAppear,5849806612437486590,1565212439,control_1
244123,MainScreenAppear,5746969938801999050,1565212483,control_1
244124,MainScreenAppear,5746969938801999050,1565212498,control_1


In [29]:
logs_data[logs_data.EventName == 'MainScreenAppear'].TestGroup.value_counts()

TestGroup
test         41175
control_2    39677
control_1    38249
Name: count, dtype: int64

Будем использовать z-тест пропорций: за количество успехов считаем количество событий `PaymentScreenSuccessful`, количество всех экспериментов — события `MainScreenAppear`.

Для начала проверьте нулевую гипотезу для двух контрольных групп

In [23]:
from statsmodels.stats.proportion import proportions_ztest

alpha = 0.05

### ╰( ͡° ͜ʖ ͡° )つ──☆*:・ﾟ
control_1 = logs_data[logs_data.TestGroup == 'control_1']
control_2 = logs_data[logs_data.TestGroup == 'control_2']

count_1 = control_1[control_1.EventName == 'PaymentScreenSuccessful'].shape[0]
count_2 = control_2[control_2.EventName == 'PaymentScreenSuccessful'].shape[0]
nobs_1 = control_1[control_1.EventName == 'MainScreenAppear'].shape[0]
nobs_2 = control_2[control_2.EventName == 'MainScreenAppear'].shape[0]
stat, pval_controls = proportions_ztest(count=np.array([count_1, count_2]),
                                        nobs=np.array([nobs_1, nobs_2]))
pval_controls, pval_controls < alpha

(2.1148551469454695e-73, True)

Теперь объедините две контрольных группы в одну и проверьте гипотезу для обычного A/B-теста

In [24]:
### ╰( ͡° ͜ʖ ͡° )つ──☆*:・ﾟ
test = logs_data[logs_data.TestGroup == 'test']

control_count = count_1 + count_2
control_nobs = nobs_1 + nobs_2
test_count = test[test.EventName == 'PaymentScreenSuccessful'].shape[0]
test_nobs = test[test.EventName == 'MainScreenAppear'].shape[0]
stat, pval = proportions_ztest(count=np.array([control_count, test_count]),
                               nobs=np.array([control_nobs, test_nobs]))
pval, pval < alpha

(5.397051376310148e-07, True)

И, наконец, используйте подход A/B/C-тестов с поправкой Бонферрони

In [25]:
### ╰( ͡° ͜ʖ ͡° )つ──☆*:・ﾟ
stat, pval_c1_test = proportions_ztest(count=np.array([count_1, test_count]),
                                       nobs=np.array([nobs_1, test_nobs]))
stat, pval_c2_test = proportions_ztest(count=np.array([count_2, test_count]),
                                       nobs=np.array([nobs_2, test_nobs]))

pval_c1_test, pval_c2_test

(1.0451245019488738e-06, 1.0713756684652275e-41)

In [26]:
bonferroni_alpha = alpha / 3
pval_controls < bonferroni_alpha, pval_c1_test < bonferroni_alpha, pval_c2_test < bonferroni_alpha

(True, True, True)