In [0]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import time
import matplotlib.pyplot as plt
from statsmodels.stats.proportion import confint_proportions_2indep

# functions needed for further calculations:

def relative_diff(num,denum):
  if denum==0:
    return np.NaN
  elif denum<0:
    return -(num/denum-1)
  else:
    return num/denum-1

def array_diff(tr,ctr):
  diff=np.array([])
  if len(ctr[ctr<0])>0:
    k=0
    for c in ctr: 
      if c<0 and tr[k]<0:
        diff=np.append(diff,-tr[k]+c)
      else:
        diff=np.append(diff,tr[k]-c)
      k=k+1
  else:
    diff=tr-ctr
  return diff

# removes outliers with given quantile
def remove_outliers(df_customer,metric,quantile_for_outliers):
  outliers_threshold=df_customer[metric].astype('float').quantile(quantile_for_outliers)
  df_wo_outliers = df_customer.drop(df_customer[(df_customer[metric] >= outliers_threshold)].index)
  return df_wo_outliers,outliers_threshold

# return samples based on column and metrics names
def get_control_and_treatment_samples(df_customer,metric,variant_col,control_gr_name,treatment_gr_name):
  control=df_customer[metric][df_customer[variant_col] == control_gr_name].values
  control=control[~np.isnan(control)]
  treatment=df_customer[metric][df_customer[variant_col] == treatment_gr_name].values
  treatment=treatment[~np.isnan(treatment)]
  return control, treatment

# return z-test CI  
def get_z_test_CI(control,treatment):
  ci_diff_primary = 1.96 * np.sqrt(np.square(np.std(treatment)) / len(treatment) + np.square(np.std(control)) / len(control))
  return ci_diff_primary

# bootstrap
def get_bootstrap_results(control,treatment,iterations=800,bootstrap_median=False):
  sample_size=len(control)
  boot_means_ctr=[]
  boot_means_tr=[]
  k = 0
  boot_mean_diff = []
  for b in np.arange(iterations):
    bootstrap_controls=np.random.choice(control, size=sample_size, replace=True)
    bootstrap_treatments=np.random.choice(treatment, size=sample_size, replace=True)
    if bootstrap_median:
      boot_mean_ctr=np.median(bootstrap_controls)
      boot_mean_tr=np.median(bootstrap_treatments)
    else:
      boot_mean_ctr=np.mean(bootstrap_controls)
      boot_mean_tr=np.mean(bootstrap_treatments)
      
    boot_means_ctr.append(boot_mean_ctr)
    boot_means_tr.append(boot_mean_tr)
    diff_means = boot_mean_tr - boot_mean_ctr
    if diff_means<0:
      k=k+1
    boot_mean_diff.append(diff_means)
  CI1_means = np.percentile(boot_mean_diff, [2.5, 97.5])
  pval=2 * np.minimum(k, iterations - k) / iterations
  ctr_mean=np.mean(boot_means_ctr)
  tr_mean=np.mean(boot_means_tr)
  mean_diff=relative_diff(tr_mean,ctr_mean)
  ci_low=CI1_means[0]
  ci_up=CI1_means[1]
  if ctr_mean>0:
    ci_prc_low=ci_low/ctr_mean
    ci_prc_up=ci_up/ctr_mean
  elif ctr_mean<0:
    ci_prc_low=-ci_up/ctr_mean
    ci_prc_up=-ci_low/ctr_mean
  else:
    ci_prc_low=0.0
    ci_prc_up=0.0
  return np.round(pval,4),np.round(ctr_mean,4),np.round(tr_mean,4),np.round(mean_diff,5),np.round(ci_prc_low,5),np.round(ci_prc_up,5)


# get buckets and apply t_test
def get_buckets_results(control,treatment,buckets=150,confidence_level = 0.95):
  buckets_means_ctr=[]
  buckets_means_tr=[]
  means_diff=[]
  control_available_buckets=control
  treatment_available_buckets=treatment
  sample_size=int(np.floor(len(control)/buckets))
  if sample_size<1000:
    buckets=int(np.floor(len(control)/1000))
    sample_size=int(np.floor(len(control)/buckets))
  
  for b in np.arange(buckets):
    if (sample_size<=len(control_available_buckets)) & (sample_size<=len(treatment_available_buckets)):
      control_idx=np.random.randint(0, len(control_available_buckets), sample_size)
      test_idx=np.random.randint(0, len(treatment_available_buckets), sample_size)
      sample_ctr=control_available_buckets[control_idx]
      sample_tr=treatment_available_buckets[test_idx]
      tr_mean=np.mean(sample_tr)
      ctr_mean=np.mean(sample_ctr)
      means_diff.append(tr_mean-ctr_mean)
      buckets_means_ctr.append(ctr_mean)
      buckets_means_tr.append(tr_mean)
      control_available_buckets = np.delete(control_available_buckets, control_idx)
      treatment_available_buckets= np.delete(treatment_available_buckets, test_idx)

  pval_mean=stats.ttest_ind(buckets_means_ctr,buckets_means_tr).pvalue
  mean_buckets_tr=np.mean(buckets_means_tr)
  mean_buckets_ctr=np.mean(buckets_means_ctr)
  
  degrees_freedom = len(means_diff) - 1
  sample_mean = np.mean(means_diff)
  sample_standard_error = stats.sem(means_diff)
  confidence_interval = stats.t.interval(confidence_level, degrees_freedom, sample_mean, sample_standard_error)
  ci_low=confidence_interval[0]
  ci_up=confidence_interval[1]
  if mean_ctr>0:
    ci_prc_low=ci_low/mean_buckets_ctr
    ci_prc_up=ci_up/mean_buckets_ctr
  elif mean_ctr<0:
    ci_prc_low=-ci_up/mean_buckets_ctr
    ci_prc_up=-ci_low/mean_buckets_ctr
  else:
    ci_prc_low=0.0
    ci_prc_up=0.0
  
   
  return np.round(pval_mean,4),np.round(mean_buckets_ctr,4),np.round(mean_buckets_tr,4),np.round(relative_diff(mean_buckets_tr,mean_buckets_ctr), 5),np.round(ci_prc_low,5),np.round(ci_prc_up,5)


# return t-test pval,CIs, mean diffs
def get_t_test_results(ctr,tr,confidence_level = 0.95):
  ctr=ctr.astype('float')
  tr=tr.astype('float')
  pval=stats.ttest_ind(ctr,tr).pvalue
  mean_tr=np.mean(tr)
  mean_ctr=np.mean(ctr)
  if len(tr)>len(ctr):
    tr=np.random.choice(tr, size=len(ctr), replace=False)
  else:
    ctr=np.random.choice(ctr, size=len(tr), replace=False)
  diff=array_diff(tr,ctr)
  degrees_freedom = len(diff) - 1
  sample_mean = np.mean(diff)
  sample_standard_error = stats.sem(diff)
  confidence_interval = stats.t.interval(confidence_level, degrees_freedom, sample_mean, sample_standard_error)
  ci_low=confidence_interval[0]
  ci_up=confidence_interval[1]
  if mean_ctr>0:
    ci_prc_low=ci_low/mean_ctr
    ci_prc_up=ci_up/mean_ctr
  elif mean_ctr<0:
    ci_prc_low=-ci_up/mean_ctr
    ci_prc_up=-ci_low/mean_ctr
  else:
    ci_prc_low=0.0
    ci_prc_up=0.0
  
  return np.round(pval,4),np.round(mean_tr,4),np.round(mean_ctr,4),np.round(relative_diff(mean_tr,mean_ctr),5),np.round(ci_prc_low,5),np.round(ci_prc_up,5)

# get results of proprtion test
def get_chi2_test_results(control_num,control_denum,treatment_num,treatment_denum):
  sum_control_num = np.sum(control_num)
  sum_control_denum = np.sum(control_denum)
  sum_treatment_num = np.sum(treatment_num)
  sum_treatment_denum = np.sum(treatment_denum)
  p_control = sum_control_num/sum_control_denum
  p_treatment = sum_treatment_num/sum_treatment_denum
  p_prc_diff = np.round(((p_treatment/p_control)-1),4)
  p_diff = p_treatment - p_control
  p_control = np.round(p_control, 4)
  p_treatment = np.round(p_treatment, 4)
  T = np.array([[sum_control_num, sum_control_denum - sum_control_num], [sum_treatment_num, sum_treatment_denum - sum_treatment_num]])
  conv_pval=np.round(stats.chi2_contingency(T,correction=False)[1],5)
  ci_low,ci_up=confint_proportions_2indep(sum_treatment_num,sum_treatment_denum,sum_control_num,sum_control_denum,alpha=0.05,compare='diff')
  ci_prc_low=np.round(ci_low/p_control,4)
  ci_prc_up=np.round(ci_up/p_control,4)
 

  return conv_pval, ci_low, ci_up, ci_prc_low, ci_prc_up, p_control, p_treatment, p_diff, p_prc_diff

def get_bootstrap_results_for_conversions(control_num_arr, control_denum_arr, tr_num_arr, tr_denum_arr, bootstrap_iterations=800):
  bootstrap_sample_size=len(control_denum_arr)
  boot_means_ctr=[]
  boot_means_tr=[]
  boot_mean_diff=[]
  k=0
  for b in np.arange(bootstrap_iterations):
    idx_control=np.random.choice(np.arange(len(control_denum_arr)), size=bootstrap_sample_size, replace=True)
    control_denum=np.sum(control_denum_arr[idx_control])
    control_num=np.sum(control_num_arr[idx_control])
    idx_tr=np.random.choice(np.arange(len(tr_denum_arr)), size=bootstrap_sample_size, replace=True)
    tr_denum=np.sum(tr_denum_arr[idx_tr])
    tr_num=np.sum(tr_num_arr[idx_tr])
    boot_mean_ctr=control_num/control_denum
    boot_mean_tr=tr_num/tr_denum
    boot_means_ctr.append(control_num/control_denum)
    boot_means_tr.append(tr_num/tr_denum)  
    diff_means = boot_mean_tr - boot_mean_ctr
    if diff_means<0:
      k=k+1
    boot_mean_diff.append(diff_means)
  CI1_means = np.percentile(boot_mean_diff, [2.5, 97.5])
  pval=2 * np.minimum(k, bootstrap_iterations - k) / bootstrap_iterations
  ctr_mean=np.mean(boot_means_ctr)
  tr_mean=np.mean(boot_means_tr)
  mean_diff=relative_diff(tr_mean,ctr_mean)
  ci_low=CI1_means[0]
  ci_up=CI1_means[1]
  ci_prc_low=ci_low/ctr_mean
  ci_prc_up=ci_up/ctr_mean
  return np.round(pval,4),np.round(ctr_mean,4),np.round(tr_mean,4),np.round(mean_diff,5),np.round(ci_prc_low,5),np.round(ci_prc_up,5)
  
def get_buckets_results_for_conversions(control_num_arr, control_denum_arr, tr_num_arr, tr_denum_arr,buckets=150,confidence_level = 0.95):
  buckets_means_ctr=[]
  buckets_means_tr=[]
  means_diff=[]
  control_num_available_buckets=control_num_arr
  treatment_num_available_buckets=tr_num_arr
  control_denum_available_buckets=control_denum_arr
  treatment_denum_available_buckets=tr_denum_arr
  sample_size=int(np.floor(len(control_num_arr)/buckets))
  if sample_size<4000:
    buckets=int(np.floor(len(control_num_arr)/4000))
    sample_size=int(np.floor(len(control_num_arr)/buckets))
  
  for b in np.arange(buckets):
    if (sample_size<=len(control_num_available_buckets)) & (sample_size<=len(treatment_num_available_buckets)):
      control_idx=np.random.randint(0, len(control_num_available_buckets), sample_size)
      test_idx=np.random.randint(0, len(treatment_num_available_buckets), sample_size)
      sample_ctr=control_num_available_buckets[control_idx]/control_denum_available_buckets[control_idx]
      sample_tr=treatment_num_available_buckets[test_idx]/treatment_denum_available_buckets[test_idx]
      tr_mean=np.mean(sample_tr)
      ctr_mean=np.mean(sample_ctr)
      means_diff.append(tr_mean-ctr_mean)
      buckets_means_ctr.append(ctr_mean)
      buckets_means_tr.append(tr_mean)
      control_num_available_buckets = np.delete(control_num_available_buckets, control_idx)
      treatment_num_available_buckets= np.delete(treatment_num_available_buckets, test_idx)
      control_denum_available_buckets = np.delete(control_denum_available_buckets, control_idx)
      treatment_denum_available_buckets= np.delete(treatment_denum_available_buckets, test_idx)

  pval_mean=stats.ttest_ind(buckets_means_ctr,buckets_means_tr).pvalue
  mean_buckets_tr=np.mean(buckets_means_tr)
  mean_buckets_ctr=np.mean(buckets_means_ctr)
  
  
  degrees_freedom = len(means_diff) - 1
  sample_mean = np.mean(means_diff)
  sample_standard_error = stats.sem(means_diff)
  confidence_interval = stats.t.interval(confidence_level, degrees_freedom, sample_mean, sample_standard_error)
  ci_low=confidence_interval[0]
  ci_up=confidence_interval[1]
  ci_prc_low=ci_low/mean_buckets_ctr
  ci_prc_up=ci_up/mean_buckets_ctr
  
   
  return np.round(pval_mean,4),np.round(mean_buckets_ctr,4),np.round(mean_buckets_tr,4),np.round(relative_diff(mean_buckets_tr,mean_buckets_ctr), 5), np.round(ci_prc_low,5), np.round(ci_prc_up,5) 


def get_bootstrap_results_for_conversions_mde(control_num_arr, control_denum_arr, uplift=0.0, bootstrap_iterations=800):
  bootstrap_sample_size=len(control_denum_arr)
  boot_means_ctr=[]
  boot_means_tr=[]
  boot_mean_diff=[]
  k=0
  for b in np.arange(bootstrap_iterations):
    idx_control=np.random.choice(np.arange(len(control_denum_arr)), size=bootstrap_sample_size, replace=True)
    control_denum=np.sum(control_denum_arr[idx_control])
    control_num=np.sum(control_num_arr[idx_control])
    idx_tr=np.random.choice(np.arange(len(control_denum_arr)), size=bootstrap_sample_size, replace=True)
    tr_denum=np.sum(control_denum_arr[idx_tr])
    tr_num=np.sum(control_num_arr[idx_tr])
    boot_mean_ctr=control_num/control_denum
    boot_mean_tr=tr_num/tr_denum*(1+uplift)
    diff_means = boot_mean_tr - boot_mean_ctr
    if diff_means<0:
      k=k+1
  pval=2 * np.minimum(k, bootstrap_iterations - k) / bootstrap_iterations
  return np.round(pval,4)
  
def get_mde(pval, mean_diff, ci_low, ci_up):
  mde = 1.0
  if pval<=0.05 and mean_diff>=0:
    mde = mean_diff - ci_low
  elif pval<=0.05 and mean_diff<0:
    mde = abs(mean_diff) - abs(ci_up)
  elif pval>0.05 and mean_diff>=0:
    mde = abs(mean_diff)+abs(ci_low)
  elif pval>0.05 and mean_diff<0:
    mde = abs(mean_diff)+abs(ci_up)
  return np.round(mde,3)


In [0]:
def main_ab_test_calculation_function(customer_id_column, test_start_date, day, calc_start_date, calc_end_date, metrics_columns, need_bootstrap, need_buckets, proportions_metrics_flag, proportions_columns,ratio_metrics_flag,ratio_columns, query , is_zps_kpis_list_needed = True
  , query_flat=None):
  
  
  metrics_columns_internal = [x for x in metrics_columns]
  
  df_results=pd.DataFrame()
  print(day)
  start_time = time.time()

  df_main_query = dwhRead(query)
  df_main_query = df_main_query.toPandas()
#   calculate ZPS KPIs
  zps_kpis_query= f"""
    select customer_id
         , coalesce(sum(gmv_bef_cancellation), 0)::float as zps_kpi_gmvbr
         , coalesce(count(purchase_attempt_id), 0) as zps_kpi_num_purchase_attempts
         , coalesce(sum(num_orders_placed), 0) as zps_kpi_num_orders_placed
         , zps_kpi_num_orders_placed as zps_kpi_num_orders_placed_2
         , coalesce(count(case when funnel_saw_payment_page then purchase_attempt_id end), 0) as zps_kpi_num_purchase_attempts_with_payment_methods_rendered
         , coalesce(count(case when funnel_reached_payment_selected and funnel_saw_payment_page then purchase_attempt_id end), 0) as zps_kpi_num_purchase_attempts_reached_payment_selected_with_rendered
         , coalesce(count(case when funnel_reached_payment_initiated then purchase_attempt_id end), 0) as zps_kpi_num_purchase_attempts_reached_payment_initiated
    from reporting.pam_purchase_attempt_funnel
    where purchase_attempt_created_at::date between '"""+test_start_date+"""' and '"""+day+"""'
    group by 1
  """
  if is_zps_kpis_list_needed:
    df_zps_kpis = dwhRead(zps_kpis_query)
    df_zps_kpis = df_zps_kpis.toPandas()
    
    df_customer = df_main_query.merge(df_zps_kpis, left_on=customer_id_column, right_on='customer_id', how='left')
  else:
    df_customer = df_main_query
  
  if query_flat:
    df_customer_flat = dwhRead(query_flat).toPandas()
      
    flat_metrics = df_customer_flat.metric_name.unique().tolist()
    main_metrics = df_customer.columns.tolist()
    
    metrics_columns_internal += flat_metrics
    
    df_customer[customer_id_column]=df_customer[customer_id_column].astype(str)
    df_customer_flat[customer_id_column]=df_customer_flat[customer_id_column].astype(str)
    
    for metric in flat_metrics:
      if metric in main_metrics:
          continue
      res = df_customer_flat[df_customer_flat.metric_name == metric][[customer_id_column, 'metric_value']].rename(columns={'metric_value': metric})
      df_customer = df_customer.merge(res, on=customer_id_column, how='left')
       
  print (list(df_customer.columns))
  print(" %s minutes needed for Query calculations " % (np.round((time.time() - start_time)/60,1)))
  start_time = time.time()
#   inject ZPS KPIs to metrics list
  if is_zps_kpis_list_needed:
    for kpi in ['zps_kpi_gmvbr','zps_kpi_num_purchase_attempts','zps_kpi_num_orders_placed']:
      metrics_columns_internal.append(kpi)
      
  for metric in metrics_columns_internal:
    is_median_needed=False
    metric_type = 'none'
    if type(metric) is dict:
      metric_name=list(metric.keys())[0]
      metric_type=metric.get(metric_name)
      metric=metric_name
      print (metric_name,metric_type,'start calculations' )
    else: print(metric,'start calculations')
    if metric_type.lower() == 'median':
      is_median_needed=True
    #     remove outliers 
    df_wo_outliers,outliers_threshold = remove_outliers(df_customer,metric,quantile_for_outliers)

    #     get needed data from DF
    control,treatment = get_control_and_treatment_samples(df_customer,metric,variant_col,control_gr_name,treatment_gr_name)
    
    if not is_median_needed:
      control_wo_outliers,treatment_wo_outliers = get_control_and_treatment_samples(df_wo_outliers,metric,variant_col,control_gr_name,treatment_gr_name)
    #     T-test pvalue calculations
      ttest_pval, ttest_mean_tr, ttest_mean_ctr, ttest_mean_diff, ttest_ci_prc_low, ttest_ci_prc_up = get_t_test_results(control,treatment)
      ttest_pval_wo_outliers, ttest_mean_tr_wo_outliers, ttest_mean_ctr_wo_outliers, ttest_mean_diff_wo_outliers, ttest_ci_prc_low_wo_outliers, ttest_ci_prc_up_wo_outliers = get_t_test_results(control_wo_outliers,treatment_wo_outliers)

    #  MW test calculations
      mw_test_pval=stats.mannwhitneyu(control,treatment).pvalue

    #  Mean, median and its differences
      control_mean=np.round(control.mean(),4)
      treatment_mean=np.round(treatment.mean(),4)
      mean_diff=np.round(relative_diff(treatment_mean,control_mean),4)

      control_mean_wo_outliers=np.round(control_wo_outliers.mean(),4)
      treatment_mean_wo_outliers=np.round(treatment_wo_outliers.mean(),4)
      mean_diff_wo_outliers=np.round(relative_diff(treatment_mean_wo_outliers,control_mean_wo_outliers),4)
    else:
      ttest_pval = np.NaN 
      ttest_mean_tr = np.NaN 
      ttest_mean_ctr  = np.NaN
      ttest_mean_diff = np.NaN
      ttest_ci_prc_low = np.NaN
      ttest_ci_prc_up = np.NaN
      ttest_pval_wo_outliers = np.NaN
      ttest_mean_tr_wo_outliers = np.NaN 
      ttest_mean_ctr_wo_outliers = np.NaN 
      ttest_mean_diff_wo_outliers = np.NaN
      ttest_ci_prc_low_wo_outliers = np.NaN
      ttest_ci_prc_up_wo_outliers = np.NaN
      

    control_median=np.median(control)
    treatment_median=np.median(treatment)
    median_diff=np.round(relative_diff(treatment_median,control_median),4)

#     #     CI bounds calculation
#     ci_diff_primary = get_z_test_CI(control,treatment)
#     ci_diff_primary_wo_outliers = get_z_test_CI(control_wo_outliers,treatment_wo_outliers)

    print(" %s minutes needed for usual tests calculations " % (np.round((time.time() - start_time)/60,1)))

    # bootstrap
    if need_bootstrap:
        btstrp_pval, btstrp_ctr_mean, btstrp_tr_mean, btstrp_mean_diff, btstrp_ci_prc_low, btstrp_ci_prc_up = get_bootstrap_results(control,treatment,bootstrap_iterations, bootstrap_median = is_median_needed)
    else:
      btstrp_pval = np.NaN
      btstrp_ctr_mean = np.NaN
      btstrp_tr_mean = np.NaN
      btstrp_mean_diff = np.NaN
      btstrp_ci_prc_low = np.NaN
      btstrp_ci_prc_up = np.NaN
    if need_boostrap_wo_outliers and not is_median_needed:
      btstrp_pval_wo_outliers, btstrp_ctr_mean_wo_outliers, btstrp_tr_mean_wo_outliers, btstrp_mean_diff_wo_outliers, btstrp_ci_prc_low_wo_outliers, btstrp_ci_prc_up_wo_outliers = get_bootstrap_results(control_wo_outliers,treatment_wo_outliers,bootstrap_iterations)
    else:
      btstrp_pval_wo_outliers = np.NaN 
      btstrp_ctr_mean_wo_outliers  = np.NaN 
      btstrp_tr_mean_wo_outliers  = np.NaN
      btstrp_mean_diff_wo_outliers = np.NaN
      btstrp_ci_prc_low_wo_outliers = np.NaN
      btstrp_ci_prc_up_wo_outliers  = np.NaN



    print(" %s minutes needed for usual+bootstrap tests calculations " % (np.round((time.time() - start_time)/60,1)))

    # buckets+t_test
    if need_buckets:
      buckets_pval, buckets_ctr_mean, buckets_tr_mean, buckets_mean_diff, buckets_ci_prc_low, buckets_ci_prc_up = get_buckets_results(control,treatment,buckets_num)
    else:
      buckets_pval = np.NaN
      buckets_ctr_mean = np.NaN
      buckets_tr_mean = np.NaN
      buckets_mean_diff = np.NaN
      buckets_ci_prc_low = np.NaN
      buckets_ci_prc_up = np.NaN
    if need_buckets_wo_outliers:
      buckets_pval_wo_outliers, buckets_ctr_mean_wo_outliers, buckets_tr_mean_wo_outliers, buckets_mean_diff_wo_outliers, buckets_ci_prc_low_wo_outliers, buckets_ci_prc_up_wo_outliers = get_buckets_results(control_wo_outliers,treatment_wo_outliers,buckets_num)
    else:
      buckets_pval_wo_outliers = np.NaN
      buckets_ctr_mean_wo_outliers = np.NaN
      buckets_tr_mean_wo_outliers = np.NaN
      buckets_mean_diff_wo_outliers = np.NaN
      buckets_ci_prc_low_wo_outliers = np.NaN
      buckets_ci_prc_up_wo_outliers = np.NaN

    print(" %s minutes needed for usual+bootstrap+buckets tests calculations " % (np.round((time.time() - start_time)/60,1)))

    btstrp_mde = get_mde(btstrp_pval, mean_diff, btstrp_ci_prc_low, btstrp_ci_prc_up)
    ttest_wo_outliers_mde = get_mde(ttest_pval, mean_diff, ttest_ci_prc_low, ttest_ci_prc_up)

    print(" %s minutes needed for usual+bootstrap+buckets+mde tests calculations " % (np.round((time.time() - start_time)/60,1)))    





    #     collect results and append it to results df
    if is_median_needed:
      metric = metric + '_median'
      control_mean = control_median
      treatment_mean = treatment_median
      mean_diff = median_diff
           
    results_for_day = pd.Series(data={'experiment_name':experiment_name,'dt':day,'metric':metric
           ,'metric_type': 'continuous'
           ,'mean_control':control_mean
           ,'mean_treatment':treatment_mean
           ,'mean_diff':mean_diff
           ,'mean_control_wo_outliers':control_mean_wo_outliers
           ,'mean_treatment_wo_outliers':treatment_mean_wo_outliers
           ,'mean_diff_wo_outliers':mean_diff_wo_outliers
           ,'median_control': control_median
           ,'median_treatment':treatment_median
           ,'median_diff':median_diff
           ,'ttest_pval':ttest_pval
           ,'ttest_mean_diff':ttest_mean_diff
           ,'ttest_ci_prc_low':ttest_ci_prc_low
           ,'ttest_ci_prc_up':ttest_ci_prc_up
           ,'ttest_wo_outliers_pval':ttest_pval_wo_outliers
           ,'ttest_wo_outliers_mean_diff':ttest_mean_diff_wo_outliers
           ,'ttest_wo_outliers_ci_prc_low':ttest_ci_prc_low_wo_outliers
           ,'ttest_wo_outliers_ci_prc_up':ttest_ci_prc_up_wo_outliers
           ,'btstrp_pval':btstrp_pval
           ,'btstrp_mean_diff':btstrp_mean_diff
           ,'btstrp_ci_prc_low':btstrp_ci_prc_low
           ,'btstrp_ci_prc_up':btstrp_ci_prc_up
           ,'btstrp_wo_outliers_pval':btstrp_pval_wo_outliers
           ,'btstrp_wo_outliers_mean_diff':btstrp_mean_diff_wo_outliers
           ,'btstrp_wo_outliers_ci_prc_low':btstrp_ci_prc_low_wo_outliers
           ,'btstrp_wo_outliers_ci_prc_up':btstrp_ci_prc_up_wo_outliers
           ,'buckets_pval':buckets_pval
           ,'buckets_mean_diff':buckets_mean_diff
           ,'buckets_ci_prc_low':buckets_ci_prc_low
           ,'buckets_ci_prc_up':buckets_ci_prc_up
           ,'buckets_wo_outliers_pval':buckets_pval_wo_outliers
           ,'buckets_wo_outliers_mean_diff':buckets_mean_diff_wo_outliers
           ,'buckets_wo_outliers_ci_prc_low':buckets_ci_prc_low_wo_outliers
           ,'buckets_wo_outliers_ci_prc_up':buckets_ci_prc_up_wo_outliers
           ,'MW_pval':mw_test_pval
           ,'chi2_pval':np.NaN                         
           ,'outliers_percentile':quantile_for_outliers
           ,'outliers_threshold':outliers_threshold
           ,'calculations_time':calculations_time
           ,'mde_bootstrap':btstrp_mde
           ,'mde_ttest_wo_outliers':ttest_wo_outliers_mde
           ,'units_control': len(control)
           ,'units_treatment': len(treatment)
           ,'srm_pval': np.round(stats.chisquare([len(control),len(treatment)])[1],4)
          },name=calculations_time)
    df_results=df_results.append(results_for_day, ignore_index=False)
  if proportions_metrics_flag:
    for num_col in proportions_columns.keys():
      denum_col=proportions_columns[num_col]
      control_num,treatment_num = get_control_and_treatment_samples(df_customer,num_col,variant_col,control_gr_name,treatment_gr_name)
      control_denum,treatment_denum = get_control_and_treatment_samples(df_customer,denum_col,variant_col,control_gr_name,treatment_gr_name)
      conv_pval, ci_low, ci_up, ci_prc_low, ci_prc_up, proportions_control, proportions_treatment, p_diff, proportions_diff = get_chi2_test_results(control_num,control_denum,treatment_num,treatment_denum)

      ztest_mde = get_mde(conv_pval, proportions_diff, ci_prc_low, ci_prc_up)

      results_for_day = pd.Series(data={'experiment_name':experiment_name,'dt':day
           ,'metric':(num_col+'/'+denum_col)
           ,'metric_type': 'proportion'
           ,'mean_control':proportions_control
           ,'mean_treatment':proportions_treatment
           ,'mean_diff':proportions_diff
           ,'mean_control_wo_outliers':np.NaN
           ,'mean_treatment_wo_outliers':np.NaN
           ,'mean_diff_wo_outliers':np.NaN
           ,'median_control': np.NaN
           ,'median_treatment':np.NaN
           ,'median_diff':np.NaN
           ,'ttest_pval':np.NaN
           ,'ttest_mean_diff':np.NaN
           ,'ttest_ci_prc_low':np.NaN
           ,'ttest_ci_prc_up':np.NaN
           ,'ttest_wo_outliers_pval':np.NaN
           ,'ttest_wo_outliers_mean_diff':np.NaN
           ,'ttest_wo_outliers_ci_prc_low':np.NaN
           ,'ttest_wo_outliers_ci_prc_up':np.NaN
           ,'btstrp_pval':np.NaN
           ,'btstrp_mean_diff':np.NaN
           ,'btstrp_ci_prc_low':ci_prc_low
           ,'btstrp_ci_prc_up':ci_prc_up
           ,'btstrp_wo_outliers_pval':np.NaN
           ,'btstrp_wo_outliers_mean_diff':np.NaN
           ,'btstrp_wo_outliers_ci_prc_low':np.NaN
           ,'btstrp_wo_outliers_ci_prc_up':np.NaN
           ,'buckets_pval':np.NaN
           ,'buckets_mean_diff':np.NaN
           ,'buckets_ci_prc_low':np.NaN
           ,'buckets_ci_prc_up':np.NaN
           ,'buckets_wo_outliers_pval':np.NaN
           ,'buckets_wo_outliers_mean_diff':np.NaN
           ,'buckets_wo_outliers_ci_prc_low':np.NaN
           ,'buckets_wo_outliers_ci_prc_up':np.NaN
           ,'MW_pval':np.NaN
           ,'chi2_pval':conv_pval                         
           ,'outliers_percentile':np.NaN
           ,'outliers_threshold':np.NaN
           ,'calculations_time':calculations_time
           ,'mde_bootstrap':ztest_mde
           ,'mde_ttest_wo_outliers':np.NaN
           ,'units_control': np.sum(control_denum)
           ,'units_treatment': np.sum(treatment_denum)
           ,'srm_pval': np.round(stats.chisquare([np.sum(control_denum),np.sum(treatment_denum)])[1],4)                           
          },name=calculations_time)
      df_results=df_results.append(results_for_day, ignore_index=False)
      print(" %s minutes needed for proportions usual tests calculations " % (np.round((time.time() - start_time)/60,1)))

#       inject ZPS KPIs ratio metrics
  if is_zps_kpis_list_needed:
    if not ratio_metrics_flag:
      ratio_columns = {}
    ratio_columns['zps_kpi_num_purchase_attempts_reached_payment_selected_with_rendered'] = 'zps_kpi_num_purchase_attempts_with_payment_methods_rendered'
    ratio_columns['zps_kpi_num_orders_placed'] = 'zps_kpi_num_purchase_attempts_reached_payment_initiated'
    ratio_columns['zps_kpi_num_orders_placed_2'] = 'zps_kpi_num_purchase_attempts'
    
  if ratio_metrics_flag or is_zps_kpis_list_needed:
    for num_col in ratio_columns.keys():
      print(num_col,'start calculations')
      denum_col=ratio_columns[num_col]
      control_num,treatment_num = get_control_and_treatment_samples(df_customer,num_col,variant_col,control_gr_name,treatment_gr_name)
      control_denum,treatment_denum = get_control_and_treatment_samples(df_customer,denum_col,variant_col,control_gr_name,treatment_gr_name)
      ratio_control=np.round(np.sum(control_num)/np.sum(control_denum),4)
      ratio_treatment=np.round(np.sum(treatment_num)/np.sum(treatment_denum),4)
      ratio_diff= np.round(relative_diff(ratio_treatment,ratio_control),4)
      btstrp_pval, btstrp_ctr_mean, btstrp_tr_mean, btstrp_mean_diff, btstrp_ci_prc_low, btstrp_ci_prc_up = get_bootstrap_results_for_conversions(control_num, control_denum, treatment_num, treatment_denum, bootstrap_iterations)
      print(" %s minutes needed for ratio bootstrap tests calculations " % (np.round((time.time() - start_time)/60,1)))

#       bucket test if requested
      if need_buckets:
        buckets_pval, buckets_ctr_mean, buckets_tr_mean, buckets_mean_diff, buckets_ci_prc_low, buckets_ci_prc_up = get_buckets_results_for_conversions(control_num, control_denum, treatment_num, treatment_denum, buckets_num)
      else:
        buckets_pval = np.NaN
        buckets_ctr_mean = np.NaN
        buckets_tr_mean = np.NaN
        buckets_mean_diff = np.NaN
        buckets_ci_prc_low = np.NaN
        buckets_ci_prc_up = np.NaN
      print(" %s minutes needed for ratio bootstrap+buckets tests calculations " % (np.round((time.time() - start_time)/60,1)))

      btstrp_mde = get_mde(btstrp_pval, ratio_diff, btstrp_ci_prc_low, btstrp_ci_prc_up)

      print(" %s minutes needed for ratio bootstrap+buckets and MDE tests calculations " % (np.round((time.time() - start_time)/60,1)))

#       inject readable name for ZPS KPIs PACR, PPSR, PSSR
      if num_col == 'zps_kpi_num_purchase_attempts_reached_payment_selected_with_rendered' and denum_col == 'zps_kpi_num_purchase_attempts_with_payment_methods_rendered':
        ratio_metric_name = 'zps_kpi_PSSR'
      elif num_col == 'zps_kpi_num_orders_placed' and denum_col == 'zps_kpi_num_purchase_attempts_reached_payment_initiated':
        ratio_metric_name = 'zps_kpi_PPSR'
      elif num_col == 'zps_kpi_num_orders_placed_2' and denum_col == 'zps_kpi_num_purchase_attempts':
        ratio_metric_name = 'zps_kpi_PACR'
      else:
        ratio_metric_name = (num_col+'/'+denum_col)
        
      results_for_day = pd.Series(data={'experiment_name':experiment_name,'dt':day
           ,'metric':ratio_metric_name
           ,'metric_type': 'ratio'
           ,'mean_control':ratio_control
           ,'mean_treatment':ratio_treatment
           ,'mean_diff':ratio_diff
           ,'mean_control_wo_outliers':np.NaN
           ,'mean_treatment_wo_outliers':np.NaN
           ,'mean_diff_wo_outliers':np.NaN
           ,'median_control': np.NaN
           ,'median_treatment':np.NaN
           ,'median_diff':np.NaN
           ,'ttest_pval':np.NaN
           ,'ttest_mean_diff':np.NaN
           ,'ttest_ci_prc_low':np.NaN
           ,'ttest_ci_prc_up':np.NaN
           ,'ttest_wo_outliers_pval':np.NaN
           ,'ttest_wo_outliers_mean_diff':np.NaN
           ,'ttest_wo_outliers_ci_prc_low':np.NaN
           ,'ttest_wo_outliers_ci_prc_up':np.NaN
           ,'btstrp_pval':btstrp_pval
           ,'btstrp_mean_diff':btstrp_mean_diff
           ,'btstrp_ci_prc_low':btstrp_ci_prc_low
           ,'btstrp_ci_prc_up':btstrp_ci_prc_up
           ,'btstrp_wo_outliers_pval':np.NaN
           ,'btstrp_wo_outliers_mean_diff':np.NaN
           ,'btstrp_wo_outliers_ci_prc_low':np.NaN
           ,'btstrp_wo_outliers_ci_prc_up':np.NaN
           ,'buckets_pval':buckets_pval
           ,'buckets_mean_diff':buckets_mean_diff
           ,'buckets_ci_prc_low':buckets_ci_prc_low
           ,'buckets_ci_prc_up':buckets_ci_prc_up
           ,'buckets_wo_outliers_pval':np.NaN
           ,'buckets_wo_outliers_mean_diff':np.NaN
           ,'buckets_wo_outliers_ci_prc_low':np.NaN
           ,'buckets_wo_outliers_ci_prc_up':np.NaN
           ,'MW_pval':np.NaN
           ,'chi2_pval':np.NaN                         
           ,'outliers_percentile':np.NaN
           ,'outliers_threshold':np.NaN
           ,'calculations_time':calculations_time
           ,'mde_bootstrap': btstrp_mde
           ,'mde_ttest_wo_outliers':np.NaN
           ,'units_control': len(control_denum)
           ,'units_treatment': len(treatment_denum)
           ,'srm_pval': np.round(stats.chisquare([len(control_denum),len(treatment_denum)])[1],4)
          },name=calculations_time)
      df_results=df_results.append(results_for_day, ignore_index=False)

  print(" %s minutes needed for Tests calculations " % (np.round((time.time() - start_time)/60,1)))
  print(day,'-done')
  return df_results
    


In [0]:
def main_function_for_sample_size_calculations(query, mde, w, treatment_share_percent, pvals, pval_counter, proportions_columns, ratio_columns, continuous_metric_name, continuous_metric_flag, proportions_metric_flag, ratio_metric_flag, continuous_metric_is_median = False):
  df_customer = dwhRead(query)
  df_customer = df_customer.toPandas()
  df_customer= df_customer.sample(frac=treatment_share_percent, replace=False, random_state=1)
  s_size=len(df_customer)

  
  if continuous_metric_flag:
    control=df_customer[continuous_metric_name].values
    control_uplifted=control*(1+mde)
    pval, btstrp_ctr_mean_uplifted, btstrp_tr_mean_uplifted, btstrp_mean_diff_uplifted, btstrp_ci_prc_low_uplifted, btstrp_ci_prc_up_uplifted = get_bootstrap_results(control,control_uplifted,bootstrap_iterations, bootstrap_median = continuous_metric_is_median) 
    pvals.update({w: pval})
    if pval<0.05:
      pval_counter+=1
    else:
      pval_counter=0
  
  if proportions_metric_flag:
    if len(proportions_columns)>1:
      raise ValueError('You should input only one proportion metric')
    num_name=list(proportions_columns.keys())[0]
    denum_name=proportions_columns[num_name]
    control_num=df_customer[num_name].values
    control_denum=df_customer[denum_name].values
    treatment_uplifted_num=np.sum(control_num)*(1+mde)
    treatment_uplifted_denum=np.sum(control_denum)
    T = np.array([[np.sum(control_num), np.sum(control_denum)-np.sum(control_num)], [treatment_uplifted_num, treatment_uplifted_denum-treatment_uplifted_num]])
    pval=np.round(stats.chi2_contingency(T,correction=False)[1],5)
    pvals.update({w: pval})
    if pval<0.05:
      pval_counter+=1
    else:
      pval_counter=0
  
  if ratio_metric_flag:
    if len(ratio_columns)>1:
      raise ValueError('You should input only one ratio metric')
    
    num_name=list(ratio_columns.keys())[0]
    denum_name=ratio_columns[num_name]
    control_num=df_customer[num_name].values
    control_denum=df_customer[denum_name].values
    pval= get_bootstrap_results_for_conversions_mde(control_num, control_denum, mde, bootstrap_iterations)
    pvals.update({w: pval})
    if pval<0.05:
      pval_counter+=1
    else:
      pval_counter=0
    
  print ('week=',w,'; pval=',pval)
  return pval, w, pval_counter, s_size, pvals

    


In [0]:
def get_summary(df_results):
  data = df_results[df_results.dt==df_results.dt.max()]
  data['pval'] = data[["btstrp_pval", "chi2_pval"]].min(axis=1)
  return data[['dt','metric', 'metric_type', 'mean_diff', 'pval']].sort_values(by=['pval'])