In [None]:
import numpy as np
import pandas as pd
import tqdm
from statsmodels.stats.proportion import proportions_ztest
from scipy.stats import mannwhitneyu, ttest_ind 

Функция для генерации выборки:

In [None]:
def generate_data(size, group_size, p_base, p_delta):
    group_count = size / group_size
    p_group = p_base + p_delta * (np.random.random(group_count) * 2 - 1)
    p_repeated = np.repeat(p_group, group_size)
    
    events = np.random.rand(size) < p_repeated
    users = np.repeat(np.arange(group_count), group_size)

    return pd.DataFrame.from_dict({'events': events, 'users': users})

Локальная линеаризация:

In [None]:
def linearization(control_df, test_df):
    f_u = control_df.groupby('users').events.sum()
    g_u = control_df.groupby('users').events.count()
    a_coef = f_u.sum() / g_u.sum()
    control_new_metric = np.array(f_u - a_coef * g_u)
    
    f_u = test_df.groupby('users').events.sum()
    g_u = test_df.groupby('users').events.count()
    test_new_metric = np.array(f_u - a_coef * g_u)
    
    return control_new_metric, test_new_metric

Функия для проведения одного эксперимента и подсчёта статзначимости в нём:

In [None]:
def calc_one_time(
    control_size, test_size, group_size,
    control_cr, test_cr, delta_cr):
    
    data_control = generate_data(
            size=control_size,
            group_size=group_size,
            p_base=control_cr,
            p_delta=delta_cr)

    data_test = generate_data(
        size=test_size,
        group_size=group_size,
        p_base=test_cr,
        p_delta=delta_cr)
    
    new_control, new_test = linearization(data_control, data_test)

    return (
        proportions_ztest(
            count=[sum(data_control.events), sum(data_test.events)],
            nobs=[control_size, test_size])[1],
        mannwhitneyu(
            data_control.events, data_test.events, alternative='two-sided').pvalue,
        ttest_ind(new_control, new_test).pvalue,
        mannwhitneyu(new_control, new_test, alternative='two-sided').pvalue)

Функция для оценки на большом количестве экспериментов:

In [None]:
def calc_pvalue(control_size, test_size, group_size,
                control_cr, test_cr, delta_cr, num_random_steps):
    
    p_values = {
        'base': {
            'ztest': [],
            'mhu': []},
        'linearization': {
            'ttest': [],
            'mhu': []}}

    for _ in tqdm.tqdm_notebook(xrange(num_random_steps)):

        a, b, c, d = calc_one_time(control_size, test_size, group_size,
                                   control_cr, test_cr, delta_cr)

        p_values['base']['ztest'].append(a)
        p_values['base']['mhu'].append(b)

        p_values['linearization']['ttest'].append(c)
        p_values['linearization']['mhu'].append(d)
    
    return p_values

## Ложные прокраски

In [None]:
base_size = 1000000
test_group = 0.2

control_size = int(base_size * (1 - test_group))
test_size = int(base_size * test_group)

cr_control = 0.8
cr_test = 0.8
cr_delta = 0.2

num_random_steps = 10000

In [None]:
p_values = calc_pvalue(control_size=control_size, test_size=test_size,
                       group_size=1, control_cr=cr_control, test_cr=cr_test,
                       delta_cr=cr_delta, num_random_steps=num_random_steps)

print np.mean(np.array(p_values['base']['ztest']) < 0.05)
print np.mean(np.array(p_values['base']['mhu']) < 0.05)
print np.mean(np.array(p_values['linearization']['ttest']) < 0.05)
print np.mean(np.array(p_values['linearization']['mhu']) < 0.05)

In [None]:
p_values = calc_pvalue(control_size=control_size, test_size=test_size,
                       group_size=5, control_cr=cr_control, test_cr=cr_test,
                       delta_cr=cr_delta, num_random_steps=num_random_steps)

print np.mean(np.array(p_values['base']['ztest']) < 0.05)
print np.mean(np.array(p_values['base']['mhu']) < 0.05)
print np.mean(np.array(p_values['linearization']['ttest']) < 0.05)
print np.mean(np.array(p_values['linearization']['mhu']) < 0.05)

In [None]:
p_values = calc_pvalue(control_size=control_size, test_size=test_size,
                       group_size=10, control_cr=cr_control, test_cr=cr_test,
                       delta_cr=cr_delta, num_random_steps=num_random_steps)

print np.mean(np.array(p_values['base']['ztest']) < 0.05)
print np.mean(np.array(p_values['base']['mhu']) < 0.05)
print np.mean(np.array(p_values['linearization']['ttest']) < 0.05)
print np.mean(np.array(p_values['linearization']['mhu']) < 0.05)

## Оценка мощности

In [None]:
result = []

In [None]:
base_size = 1000000
test_group = 0.2

control_size = int(base_size * (1 - test_group))
test_size = int(base_size * test_group)

group_size = 5

cr_control = 0.8
cr_delta = 0.15

num_random_steps = 10000

In [None]:
cr_test = cr_control + 0.001
p_values = calc_pvalue(control_size=control_size, test_size=test_size,
                       group_size=group_size, control_cr=cr_control,
                       test_cr=cr_test, delta_cr=cr_delta,
                       num_random_steps=num_random_steps)
result.append(p_values)

print np.mean(np.array(p_values['base']['ztest']) < 0.05)
print np.mean(np.array(p_values['base']['mhu']) < 0.05)
print np.mean(np.array(p_values['linearization']['ttest']) < 0.05)
print np.mean(np.array(p_values['linearization']['mhu']) < 0.05)

In [None]:
cr_test = cr_control + 0.002
p_values = calc_pvalue(control_size=control_size, test_size=test_size,
                       group_size=group_size, control_cr=cr_control,
                       test_cr=cr_test, delta_cr=cr_delta,
                       num_random_steps=num_random_steps)
result.append(p_values)

print np.mean(np.array(p_values['base']['ztest']) < 0.05)
print np.mean(np.array(p_values['base']['mhu']) < 0.05)
print np.mean(np.array(p_values['linearization']['ttest']) < 0.05)
print np.mean(np.array(p_values['linearization']['mhu']) < 0.05)

In [None]:
cr_test = cr_control + 0.003
p_values = calc_pvalue(control_size=control_size, test_size=test_size,
                       group_size=group_size, control_cr=cr_control,
                       test_cr=cr_test, delta_cr=cr_delta,
                       num_random_steps=num_random_steps)
result.append(p_values)

print np.mean(np.array(p_values['base']['ztest']) < 0.05)
print np.mean(np.array(p_values['base']['mhu']) < 0.05)
print np.mean(np.array(p_values['linearization']['ttest']) < 0.05)
print np.mean(np.array(p_values['linearization']['mhu']) < 0.05)

In [None]:
cr_test = cr_control + 0.004
p_values = calc_pvalue(control_size=control_size, test_size=test_size,
                       group_size=group_size, control_cr=cr_control,
                       test_cr=cr_test, delta_cr=cr_delta,
                       num_random_steps=num_random_steps)
result.append(p_values)

print np.mean(np.array(p_values['base']['ztest']) < 0.05)
print np.mean(np.array(p_values['base']['mhu']) < 0.05)
print np.mean(np.array(p_values['linearization']['ttest']) < 0.05)
print np.mean(np.array(p_values['linearization']['mhu']) < 0.05)

In [None]:
cr_test = cr_control + 0.005
p_values = calc_pvalue(control_size=control_size, test_size=test_size,
                       group_size=group_size, control_cr=cr_control,
                       test_cr=cr_test, delta_cr=cr_delta,
                       num_random_steps=num_random_steps)
result.append(p_values)

print np.mean(np.array(p_values['base']['ztest']) < 0.05)
print np.mean(np.array(p_values['base']['mhu']) < 0.05)
print np.mean(np.array(p_values['linearization']['ttest']) < 0.05)
print np.mean(np.array(p_values['linearization']['mhu']) < 0.05)