In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import logging
import sys
import re
from citymobil_python_mysql_wrapper import MysqlWrapper
import pyexasol
from random import randrange
from itertools import combinations

import statsmodels.api as sm
from statsmodels.stats.power import tt_ind_solve_power, GofChisquarePower
from statsmodels.stats.gof import chisquare_effectsize
from statsmodels.sandbox.stats.multicomp import multipletests
from scipy.stats import norm, chi2_contingency

In [2]:
import query as q

In [3]:
pd.set_option("display.max_columns", 999)
pd.set_option("display.max_rows", 999)

In [4]:
def get_bootstrap(
    data_column_1, # числовые значения первой выборки
    data_column_2, # числовые значения второй выборки
    boot_it = 1000, # количество бутстрэп-подвыборок
    statistic = np.mean, # интересующая нас статистика
    bootstrap_conf_level = 0.99 # уровень значимости
):
    
    '''Bootstrap для непрерывной метрики'''
    
    boot_data = []
    for i in range(boot_it): # извлекаем подвыборки
#    for i in tqdm(range(boot_it)): # извлекаем подвыборки
        samples_1 = data_column_1.sample(
            len(data_column_1), 
            replace = True # параметр возвращения
        ).values
        
        samples_2 = data_column_2.sample(
            len(data_column_1), 
            replace = True
        ).values
        
        boot_data.append(statistic(samples_1)-statistic(samples_2)) # mean() - применяем статистику
        
    pd_boot_data = pd.DataFrame(boot_data)
        
    left_quant = (1 - bootstrap_conf_level)/2
    right_quant = 1 - (1 - bootstrap_conf_level) / 2
    quants = pd_boot_data.quantile([left_quant, right_quant])
        
    p_1 = norm.cdf(
        x = 0, 
        loc = np.mean(boot_data), 
        scale = np.std(boot_data)
    )
    p_2 = norm.cdf(
        x = 0, 
        loc = -np.mean(boot_data), 
        scale = np.std(boot_data)
    )
    p_value = min(p_1, p_2) * 2
       
    return {"boot_data": boot_data, 
            "quants": quants, 
            "p_value": p_value}

def bootstrap_ratio(
        data: pd.DataFrame,
        x: str,
        y: str,
        x_f,
        y_f,
        split='EXP_GROUP',
        user_level_col='DRIVER_RK',
        boot_it=1000,
        conf_level=0.99):
    
    '''Bootstrap для метрики отношения'''
    
    data = data.sort_values(by=[split], ascending=[True])
    
    data_splitted = [x for _, x in data.groupby(split)]
    boot_data = []

    for i in range(boot_it):
        s0 = data_splitted[0][data_splitted[0][user_level_col].isin(
            data_splitted[0][user_level_col].sample(data_splitted[0][user_level_col].nunique(), replace=True))]
        s1 = data_splitted[1][data_splitted[1][user_level_col].isin(
            data_splitted[1][user_level_col].sample(data_splitted[1][user_level_col].nunique(), replace=True))]

        y0 = y_f(s0[y])
        y1 = y_f(s1[y])
        x0 = x_f(s0[x])
        x1 = x_f(s1[x])

        if y0 == 0 or y1 == 0:
            return None, False
        elif x0 == 0 or x1 == 0:
            return None, False

        s0_ratio = x0 / y0
        s1_ratio = x1 / y1
        boot_data.append(s1_ratio - s0_ratio)

    pd_boot_data = pd.DataFrame(boot_data)

    p_1 = norm.cdf(x=0, loc=np.mean(boot_data), scale=np.std(boot_data))
    p_2 = norm.cdf(x=0, loc=-np.mean(boot_data), scale=np.std(boot_data))
    pvalue = min(p_1, p_2) * 2
    mark = (pvalue < 1 - conf_level)

    return pvalue, mark

# Запрашиваем из БД все необходимые данные

In [5]:
cred = pd.read_json(r'/Users/skostuchik/crd_exa.json')
user = cred.iloc[0, 0]
password = cred.iloc[0, 1]

C = pyexasol.connect(dsn='ex1..3.city-srv.ru:8563', user=user, password=password, fetch_dict=True)

In [6]:
alpha, power, n_bins = 0.01, 0.9, 2

In [7]:
campaigns = [
    {'id': 46471, 'date_from' : '2021-12-08', 'date_to' : '2021-12-14'},
    {'id': 49899, 'date_from' : '2022-01-24', 'date_to' : '2022-01-28'}
    ]

In [8]:
ar_suggest_data, of2r_data, mph_data, copt_data, dist_data, churn_data = [pd.DataFrame() for i in range(6)]

In [9]:
for c in tqdm(campaigns):
    query_params = {'id':c.get('id'), 'date_from':c.get('date_from'), 'date_to':c.get('date_to')}
    
    e_ar, e_of2r, e_mph, e_copt, e_dist, e_ch = C.execute(q.ar_query.format(**query_params)),\
    C.execute(q.of2r_query.format(**query_params)), C.execute(q.mph_query.format(**query_params)),\
    C.execute(q.copt_query.format(**query_params)), C.execute(q.dist_query.format(**query_params)),\
    C.execute(q.churn_query.format(**query_params))
    
    ar_suggest_data, of2r_data, mph_data, copt_data, dist_data, churn_data = \
    ar_suggest_data.append(pd.DataFrame(e_ar.fetchall())), of2r_data.append(pd.DataFrame(e_of2r.fetchall())),\
    mph_data.append(pd.DataFrame(e_mph.fetchall())), copt_data.append(pd.DataFrame(e_copt.fetchall())),\
    dist_data.append(pd.DataFrame(e_dist.fetchall())), churn_data.append(pd.DataFrame(e_ch.fetchall()))

100%|██████████| 2/2 [00:13<00:00,  6.55s/it]


In [10]:
for dataframe in [ar_suggest_data, of2r_data, mph_data, copt_data, dist_data, churn_data]:
    dataframe.reset_index(inplace=True)
    dataframe.drop(columns='index', inplace=True)

In [11]:
titles_sql = f'''select id, TITLE
from REPLICA.DRIVER_BONUS db
    where id in {tuple([c.get('id') for c in campaigns])}'''

titles = pd.DataFrame(C.execute(titles_sql).fetchall())

titles_dict = {}
for i in titles.itertuples():
    titles_dict.update({i.ID:i.TITLE})

In [12]:
split_pairs_df = ar_suggest_data.groupby('BONUS_ID')['EXP_GROUP'].unique().reset_index()
split_pairs = {}

for r in split_pairs_df.itertuples():
    split_pairs.update(
        {r.BONUS_ID:{comb[0] + '_' + comb[1]:list(comb) for comb in combinations(sorted(r.EXP_GROUP), 2)}})

In [13]:
#есть косяк с наполнением таблицы SH в DWH
mph_data = mph_data[mph_data['SUPPLY_HOURS'].notna()]

In [14]:
config = {
    'metrics': {
        'AR':{
            'df':ar_suggest_data,
            'add_breakdown':{
                'Total':'Total',
                'ChainOffer':'SPECIFICATION_NAME',
                '0 - 3 km':'DISTANCE_CATEGORY','3 - 6 km':'DISTANCE_CATEGORY',
                '6 - 10 km':'DISTANCE_CATEGORY','+10 km':'DISTANCE_CATEGORY'
            },
            'options':{
                'x':['ACCEPT'],
                'n':['FSS_ID', pd.Series.nunique]
            },
            'type':'binomial'
        },
        'RR':{
            'df':ar_suggest_data,
            'add_breakdown':{
                'Total':'Total',
                'ChainOffer':'SPECIFICATION_NAME',
                '0 - 3 km':'DISTANCE_CATEGORY','3 - 6 km':'DISTANCE_CATEGORY',
                '6 - 10 km':'DISTANCE_CATEGORY','+10 km':'DISTANCE_CATEGORY'
            },
            'options':{
                'x':['REJECT'],
                'n':['FSS_ID', pd.Series.nunique]
            },
            'type':'binomial'
        },
        'FR':{
            'df':ar_suggest_data,
            'add_breakdown':{
                'Total':'Total',
                'ChainOffer':'SPECIFICATION_NAME',
                '0 - 3 km':'DISTANCE_CATEGORY','3 - 6 km':'DISTANCE_CATEGORY',
                '6 - 10 km':'DISTANCE_CATEGORY','+10 km':'DISTANCE_CATEGORY'
            },
            'options':{
                'x':['FRAUD'],
                'n':['FSS_ID', pd.Series.nunique]
            },
            'type':'binomial'
        },
        'CHURN_7DAY':{
            'df':churn_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['CHURN_7DAY'],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'binomial'
        },
        'CHURN_14DAY':{
            'df':churn_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['CHURN_14DAY'],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'binomial'
        },
        'CHURN_21DAY':{
            'df':churn_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['CHURN_21DAY'],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'binomial'
        },
        'MPH_GROSS':{
            'df':mph_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['MONEY_GROSS', np.sum],
                'y':['SUPPLY_HOURS', np.sum],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'MPH_NET':{
            'df':mph_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['MONEY_NET', np.sum],
                'y':['SUPPLY_HOURS', np.sum],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'ORGANIC_MPH_GROSS':{
            'df':mph_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['ORGANIC_MONEY_GROSS', np.sum],
                'y':['SUPPLY_HOURS', np.sum],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'ORGANIC_MPH_NET':{
            'df':mph_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['ORGANIC_MONEY_NET', np.sum],
                'y':['SUPPLY_HOURS', np.sum],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'SHpD':{
            'df':mph_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['SUPPLY_HOURS', np.sum],
                'y':['DRIVER_RK', pd.Series.nunique],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'DE':{
            'df':mph_data,
            'add_breakdown':{
                'Total':'Total', 'Not_park':'PARK', 'Park':'PARK', 'not_brand':'BRAND', 'brand':'BRAND',
                'TOP':'SEGMENT', 'HIGH':'SEGMENT'
            },
            'options':{
                'x':['ON_TRIP', np.sum],
                'y':['SUPPLY_HOURS', np.sum],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'TPD':{
            'df':mph_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['TRIPS', np.sum],
                'y':['DRIVER_RK', pd.Series.nunique],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'OF2R':{
            'df':of2r_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['RIDES', np.sum],
                'y':['OFFERS', np.sum],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'GMVPT':{
            'df':copt_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['GMV', np.sum],
                'y':['RIDES', np.sum],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'COPT':{
            'df':copt_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['CONTRIBUTION', np.sum],
                'y':['RIDES', np.sum],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'DIPT':{
            'df':copt_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['DI', np.sum],
                'y':['RIDES', np.sum],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'COMPT':{
            'df':copt_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['COMMISSION', np.sum],
                'y':['RIDES', np.sum],
                'n':['DRIVER_RK', pd.Series.nunique]
            },
            'type':'ratio'
        },
        'DIST_MEAN':{
            'df':dist_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['EXP_DIST_KM', np.mean],
                'n':['ORDER_RK', pd.Series.nunique]
            },
            'type':'continious'
        },
        'DIST_MEDIAN':{
            'df':dist_data,
            'add_breakdown':{
                'Total':'Total'
            },
            'options':{
                'x':['EXP_DIST_KM', np.median],
                'n':['ORDER_RK', pd.Series.nunique]
            },
            'type':'continious'
        }
    }
}

In [15]:
def stat_res_calculation(metric: str, m_type: str, df: pd.DataFrame, splits: list, cfg=config):
    '''Возвращает массив со всеми расчитанными метриками'''
    
    split_1, split_2 = splits
    
    if m_type == 'binomial':
        a_s = len(df[(df['EXP_GROUP'] == split_1)&(df[config['metrics'][metric]['options']['x'][0]] > 0)])
        a_f = len(df[(df['EXP_GROUP'] == split_1)&(df[config['metrics'][metric]['options']['x'][0]] == 0)])
        b_s = len(df[(df['EXP_GROUP'] == split_2)&(df[config['metrics'][metric]['options']['x'][0]] > 0)])
        b_f = len(df[(df['EXP_GROUP'] == split_2)&(df[config['metrics'][metric]['options']['x'][0]] == 0)])
        
        act_nobs_a, act_nobs_b = a_s+a_f, b_s+b_f
        successes, fails = [a_s, b_s], [a_f, b_f]
        probs0, probs1 = np.array([a_s, a_f]), np.array([b_s, b_f])
        effect_size = chisquare_effectsize(probs0, probs1, correction=None, cohen=True, axis=0)
        
        p_val = chi2_contingency(np.array([fails, successes]))[1]
        mde = int(GofChisquarePower().solve_power(effect_size=effect_size, alpha=alpha,
                                                  power=power, n_bins=n_bins))
        mde_marker_value = 1 if (act_nobs_a>=mde)&(act_nobs_b>=mde) else np.nan
        
        a = a_s/act_nobs_a
        b = b_s/act_nobs_b
        f = config['metrics'][metric]['options']['n'][1]
        a_drivers = df[df['EXP_GROUP'] == split_1]['DRIVER_RK'].nunique()
        b_drivers = df[df['EXP_GROUP'] == split_2]['DRIVER_RK'].nunique()
        lift = b-a

        
    elif m_type == 'ratio':
        x = config['metrics'][metric]['options']['x'][0]
        x_f = config['metrics'][metric]['options']['x'][1]
        y = config['metrics'][metric]['options']['y'][0]
        y_f = config['metrics'][metric]['options']['y'][1]
        
        f = config['metrics'][metric]['options']['n'][1]
        act_nobs_a = f(df[df['EXP_GROUP'] == split_1][config['metrics'][metric]['options']['n'][0]])
        act_nobs_b = f(df[df['EXP_GROUP'] == split_2][config['metrics'][metric]['options']['n'][0]])
        
        mde_ttl = df.groupby('EXP_GROUP').mean().reset_index()
        mean = df[df['EXP_GROUP']==split_1][x].mean()
        sd = df[df['EXP_GROUP']==split_1][x].std()
        lift = mde_ttl[x][1] / mde_ttl[x][0]-1
        effect_size = mean / sd * lift
        
        p_val = bootstrap_ratio(data=df, x=x, y=y, x_f=x_f, y_f=y_f)[0]
        mde = int(tt_ind_solve_power(effect_size=effect_size, alpha=alpha, power=power, nobs1=None, ratio=1))
        mde_marker_value = 1 if (act_nobs_a>=mde)&(act_nobs_b>=mde) else np.nan
        
        a = x_f(df[df['EXP_GROUP']==split_1][x]) / y_f(df[df['EXP_GROUP']==split_1][y])
        b = x_f(df[df['EXP_GROUP']==split_2][x]) / y_f(df[df['EXP_GROUP']==split_2][y])
        a_drivers, b_drivers = act_nobs_a, act_nobs_b
        lift = (b-a)/abs(a)
        
    elif m_type == 'continious':
        x = config['metrics'][metric]['options']['x'][0]
        f = config['metrics'][metric]['options']['x'][1]
        f_n = config['metrics'][metric]['options']['n'][1]

        a = f(df[df['EXP_GROUP']==split_1][x])
        b = f(df[df['EXP_GROUP']==split_2][x])



        act_nobs_a = f_n(df[df['EXP_GROUP'] == split_1][config['metrics'][metric]['options']['n'][0]])
        act_nobs_b = f_n(df[df['EXP_GROUP'] == split_2][config['metrics'][metric]['options']['n'][0]])

        #mde_ttl = df.groupby('EXP_GROUP').mean().reset_index()
        mean = a
        sd = df[df['EXP_GROUP']==split_1][x].std()
        lift = b / a - 1
        effect_size = mean / sd * lift

        booted_data_ab = get_bootstrap(data_column_1=df[df['EXP_GROUP']==split_1][x], # числовые значения первой выборки
                                       data_column_2=df[df['EXP_GROUP']==split_2][x], # числовые значения второй выборки
                                       statistic = f)
        p_val = booted_data_ab["p_value"]
        q_low = float(booted_data_ab["quants"].iloc[0])
        q_high = float(booted_data_ab["quants"].iloc[1])

        if q_low < 0 and q_high > 0:
            p_val = 1
        else:
            p_val = p_val

        try:
            mde = int(tt_ind_solve_power(effect_size=effect_size, alpha=alpha, power=power, nobs1=None, ratio=1))
        except:
            mde = 990999999
        mde_marker_value = 1 if (act_nobs_a>=mde)&(act_nobs_b>=mde) else np.nan

        a_drivers = df[df['EXP_GROUP'] == split_1]['DRIVER_RK'].nunique()
        b_drivers = df[df['EXP_GROUP'] == split_2]['DRIVER_RK'].nunique()

    else:
        return('Unknown metric type')
    
    nobs_diff_value = (act_nobs_b-act_nobs_a)/abs(act_nobs_a)
    dn_diff_value = (b_drivers-a_drivers)/abs(a_drivers)
    
    return({'p_val':p_val, 'nobs_needed':mde, 'mde_ok':mde_marker_value,
            'split_a':a, 'split_b':b, 'lift':lift, 'split_a_n_obs':act_nobs_a, 'split_b_n_obs':act_nobs_b,
            'n_obs_difference':nobs_diff_value, 'split_a_drivers':a_drivers, 'split_b_drivers':b_drivers,
            'unique_drivers_difference':dn_diff_value})

In [16]:
result_dict = {}

for b in tqdm(ar_suggest_data.BONUS_ID.unique()):
    for metric,v in config.get('metrics').items():
        for split_pair, splits in split_pairs.get(b).items():
            df = v.get('df')
            metric_data = df[(df['BONUS_ID']==int(b))&(df.EXP_GROUP.isin(splits))]
            for value, fltr in v.get('add_breakdown').items():
                if fltr != 'Total':
                    add_breakdown_data = metric_data[metric_data[fltr] == value]
                else:
                    add_breakdown_data = metric_data
                #print(len(add_breakdown_data))
                BONUS_ID = add_breakdown_data['BONUS_ID'].iloc[0]

                metric_name = metric+'_'+value if len(v.get('add_breakdown')) > 1 else metric



                #print(locality, metric, fltr, value, metric_name)
                #print(locality, metric_name)

                try:
                    src = stat_res_calculation(metric=metric, m_type=v.get('type'), df=add_breakdown_data,
                                               splits = splits)
                    result_dict.update({(BONUS_ID,split_pair,metric_name):src})
                except:
                    continue

  lift = mde_ttl[x][1] / mde_ttl[x][0]-1
  lift = mde_ttl[x][1] / mde_ttl[x][0]-1
  lift = mde_ttl[x][1] / mde_ttl[x][0]-1
100%|██████████| 2/2 [01:43<00:00, 51.54s/it]


In [17]:
experiment_summary = pd.DataFrame(result_dict).T

# Делаем поправку БХ на множественные сравнения

In [18]:
p_res = pd.DataFrame(experiment_summary['p_val']).fillna(1)

In [19]:
p_adj_list = {}

for bonus_id in p_res.index.get_level_values(0).unique():

    loc_index = p_res[p_res.index.get_level_values(0)==bonus_id].index
    loc_p = p_res[p_res.index.get_level_values(0)==bonus_id]['p_val']
    loc_p_adj = multipletests(loc_p, alpha=0.01, method='fdr_bh')[1]
    loc_dict = dict(list(zip(loc_index,zip(loc_p,loc_p_adj))))
    
    p_adj_list.update(loc_dict)

In [20]:
p_adj = pd.DataFrame(p_adj_list, index=['p_val','p_adj']).T

# Итоги теста:

In [21]:
experiment_summary = experiment_summary.join(p_adj['p_adj'])
experiment_summary = experiment_summary.reset_index().rename(columns={"level_0": "bonus_id", "level_1": "split_pair",
                                                                      "level_2": "metric"})
experiment_summary['title'] = experiment_summary['bonus_id'].apply(lambda x: titles_dict.get(x))
experiment_summary = experiment_summary[[
    'bonus_id', 'title', 'split_pair', 'metric', 'split_a', 'split_b', 'lift', 'split_a_n_obs', 'split_b_n_obs',
    'n_obs_difference', 'split_a_drivers', 'split_b_drivers', 'unique_drivers_difference', 'p_val', 'p_adj',
    'nobs_needed', 'mde_ok']]

In [22]:
#словарь для порядка сортировки
m_order = {'AR_Total': 0, 'RR_Total': 1, 'FR_Total': 2, 'AR_ChainOffer': 3, 'RR_ChainOffer': 4, 'FR_ChainOffer': 5,
           'AR_0 - 3 km': 6, 'AR_3 - 6 km': 7, 'AR_6 - 10 km': 8, 'AR_+10 km': 9, 'RR_0 - 3 km': 10,
           'RR_3 - 6 km': 11, 'RR_6 - 10 km': 12, 'RR_+10 km': 13, 'FR_0 - 3 km': 14, 'FR_3 - 6 km': 15,
           'FR_6 - 10 km': 16, 'FR_+10 km': 17, 'OF2R': 18, 'CHURN_7DAY': 19, 'CHURN_14DAY': 20, 'CHURN_21DAY': 21,
           'MPH_GROSS': 22, 'MPH_NET': 23, 'ORGANIC_MPH_GROSS': 24, 'ORGANIC_MPH_NET': 25, 'SHpD': 26, 'TPD': 27,
           'GMVPT': 28, 'COPT': 29, 'DIPT': 30, 'COMPT': 31, 'DIST_MEAN': 32, 'DIST_MEDIAN': 33, 'DE_Total': 34,
           'DE_Park': 35, 'DE_Not_park': 36, 'DE_not_brand': 37, 'DE_brand': 38, 'DE_HIGH': 39, 'DE_TOP': 40}

m_order.update(dict((v,k) for k,v in dict(list(enumerate(sorted(experiment_summary.bonus_id.unique())))).items()))
m_order.update(dict((v,k) for k,v in dict(list(enumerate(sorted(experiment_summary.split_pair.unique())))).items()))

In [23]:
experiment_summary = experiment_summary.sort_values(by=['bonus_id','split_pair','metric'],
                                                    key=lambda x: x.map(m_order))
prokras = experiment_summary[(experiment_summary['p_adj']<=0.01)&(experiment_summary['mde_ok']==1)]

In [24]:
experiment_summary.drop(['unique_drivers_difference'],axis=1)

Unnamed: 0,bonus_id,title,split_pair,metric,split_a,split_b,lift,split_a_n_obs,split_b_n_obs,n_obs_difference,split_a_drivers,split_b_drivers,p_val,p_adj,nobs_needed,mde_ok
0,46471,Y как %,A_B,AR_Total,0.655395,0.658685,0.00329,3105.0,7726.0,1.488245,23.0,54.0,0.7611376,0.951422,310386.0,
6,46471,Y как %,A_B,RR_Total,0.273752,0.31271,0.038958,3105.0,7726.0,1.488245,23.0,54.0,7.128011e-05,0.0004073149,1949.0,1.0
12,46471,Y как %,A_B,FR_Total,0.070853,0.028605,-0.042249,3105.0,7726.0,1.488245,23.0,54.0,1.420409e-23,2.840818e-22,548.0,1.0
1,46471,Y как %,A_B,AR_ChainOffer,0.868735,0.848454,-0.020281,419.0,970.0,1.315036,21.0,47.0,0.3674857,0.579875,4124.0,
7,46471,Y как %,A_B,RR_ChainOffer,0.114558,0.14433,0.029771,419.0,970.0,1.315036,21.0,47.0,0.1605547,0.2792256,1702.0,
13,46471,Y как %,A_B,FR_ChainOffer,0.016706,0.007216,-0.00949,419.0,970.0,1.315036,21.0,47.0,0.1826897,0.3044828,2714.0,
2,46471,Y как %,A_B,AR_0 - 3 km,0.823038,0.849352,0.026314,599.0,1467.0,1.449082,22.0,52.0,0.1554931,0.2792256,3129.0,
3,46471,Y как %,A_B,AR_3 - 6 km,0.823009,0.82062,-0.002389,678.0,1678.0,1.474926,22.0,52.0,0.9381101,1.0,379739.0,
4,46471,Y как %,A_B,AR_6 - 10 km,0.7733,0.744778,-0.028521,794.0,1963.0,1.472292,23.0,51.0,0.127591,0.2551821,3206.0,
5,46471,Y как %,A_B,AR_+10 km,0.357834,0.383499,0.025665,1034.0,2618.0,1.531915,22.0,53.0,0.1601737,0.2792256,5190.0,


In [25]:
prokras.drop(['unique_drivers_difference'],axis=1)

Unnamed: 0,bonus_id,title,split_pair,metric,split_a,split_b,lift,split_a_n_obs,split_b_n_obs,n_obs_difference,split_a_drivers,split_b_drivers,p_val,p_adj,nobs_needed,mde_ok
6,46471,Y как %,A_B,RR_Total,0.273752,0.31271,0.038958,3105.0,7726.0,1.488245,23.0,54.0,7.128011e-05,0.0004073149,1949.0,1.0
12,46471,Y как %,A_B,FR_Total,0.070853,0.028605,-0.042249,3105.0,7726.0,1.488245,23.0,54.0,1.420409e-23,2.840818e-22,548.0,1.0
10,46471,Y как %,A_B,RR_6 - 10 km,0.164987,0.220071,0.055084,794.0,1963.0,1.472292,23.0,51.0,0.001389757,0.006176698,675.0,1.0
17,46471,Y как %,A_B,FR_+10 km,0.113153,0.038197,-0.074956,1034.0,2618.0,1.531915,22.0,53.0,1.1835440000000001e-17,1.183544e-16,265.0,1.0
36,46471,Y как %,A_B,DIPT,5.85408,26.812835,3.580196,23.0,51.0,1.217391,23.0,51.0,8.112852999999999e-38,3.245141e-36,3.0,1.0
44,49899,СПБ - Бренд Парк - 4 дня - 24.01.2022,A_B,AR_6 - 10 km,0.58387,0.624397,0.040527,3261.0,9116.0,1.795462,142.0,415.0,4.944363e-05,0.0004096758,2201.0,1.0
46,49899,СПБ - Бренд Парк - 4 дня - 24.01.2022,A_C,AR_Total,0.592207,0.617002,0.024795,17298.0,49277.0,1.848711,146.0,419.0,9.21842e-09,2.349491e-07,5844.0,1.0
64,49899,СПБ - Бренд Парк - 4 дня - 24.01.2022,A_C,RR_Total,0.36854,0.341904,-0.026636,17298.0,49277.0,1.848711,146.0,419.0,2.710043e-10,3.143649e-08,4880.0,1.0
49,49899,СПБ - Бренд Парк - 4 дня - 24.01.2022,A_C,AR_3 - 6 km,0.695663,0.726466,0.030803,3920.0,11154.0,1.845408,142.0,409.0,0.0002445264,0.001668533,3320.0,1.0
50,49899,СПБ - Бренд Парк - 4 дня - 24.01.2022,A_C,AR_6 - 10 km,0.58387,0.627395,0.043525,3261.0,9133.0,1.800675,142.0,411.0,1.266354e-05,0.0001224142,1908.0,1.0


In [None]:
#экспорт в эксель
experiment_summary.to_excel(r'res_ttl.xlsx', sheet_name='experiment_summary', index = False)
prokras.to_excel(r'res_prokras.xlsx', sheet_name='prokras', index = False)