In [126]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.stats.api as sms
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from math import ceil

%matplotlib inline

In [127]:
def sample_size_calc_mean(control_mean,std_dev,relative_change,control_size):
    
    test_mean = control_mean*(1+(relative_change/100))
    effective_size = (test_mean - control_mean)/std_dev
    
    required_test = sms.NormalIndPower().solve_power(effective_size, 
        power=0.8, 
        alpha=0.05, 
        ratio=control_size/(100-control_size),
        alternative='larger')                                                 
    required_test = ceil(required_test)
        
    required_control = sms.NormalIndPower().solve_power(effective_size, 
        power=0.8, 
        alpha=0.05, 
        ratio=(100-control_size)/control_size,
        alternative='larger')                                                 

    required_control = ceil(required_control)
    output_mean["Change"].append("1%")
    output_mean["Split_Test-Control"].append(str(100-control_size)+"-"+str(control_size))
    output_mean["Test Volume"].append(required_test)
    output_mean["Control Volume"].append(required_control)
    output_mean["Test Mean"].append(test_mean)
    output_mean["Control Mean"].append(control_mean)
    output_mean["Expected STDDEV"].append(std_dev)
    output_mean["Test Claims"].append(ceil(test_mean*required_test))
    output_mean["Control Claims"].append(ceil(control_mean*required_control))


In [128]:
output_mean = {"Change":[],"Split_Test-Control":[],"Test Volume":[],"Control Volume":[],"Test Mean":[],"Control Mean":[], 
              "Expected STDDEV":[],"Test Claims":[],"Control Claims":[]}


In [129]:
l=[2,5,10,25,50]
for i in l:
    control_mean = 2.80
    std_dev = 0.66
    relative_change = 1
    control_size = i
    sample_size_calc_mean(control_mean,std_dev,relative_change,control_size)

pd.DataFrame(output_mean)

Unnamed: 0,Change,Split_Test-Control,Test Volume,Control Volume,Test Mean,Control Mean,Expected STDDEV,Test Claims,Control Claims
0,1%,98-2,171756,3506,2.828,2.8,0.66,485726,9817
1,1%,95-5,68703,3616,2.828,2.8,0.66,194293,10125
2,1%,90-10,34352,3817,2.828,2.8,0.66,97148,10688
3,1%,75-25,13741,4581,2.828,2.8,0.66,38860,12827
4,1%,50-50,6871,6871,2.828,2.8,0.66,19432,19239


In [144]:
def sample_size_calc_prop(control_resp_rate,relative_change,control_size):
    
    test_resp_rate = control_resp_rate*(1+(relative_change/100))
    effect_size = sms.proportion_effectsize(control_resp_rate,test_resp_rate)  
    
    required_test = sms.NormalIndPower().solve_power(-effect_size, 
        power=0.8, 
        alpha=0.05, 
        ratio=control_size/(100-control_size),
        alternative='larger')                                                 
    required_test = ceil(required_test)
    
    required_control = sms.NormalIndPower().solve_power(-effect_size, 
        power=0.8, 
        alpha=0.05, 
        ratio=(100-control_size)/control_size,
        alternative='larger')                                                 

    required_control = ceil(required_control)

    
    output_prop["Change"].append("1%")
    output_prop["Split_Test-Control"].append(str(100-control_size)+"-"+str(control_size))
    output_prop["Test Volume"].append(required_test)
    output_prop["Control Volume"].append(required_control)
    output_prop["Test Rate"].append(test_resp_rate)
    output_prop["Control Rate"].append(control_resp_rate)
    output_prop["Test Claims"].append(ceil(test_resp_rate*required_test))
    output_prop["Control Claims"].append(ceil(control_resp_rate*required_control))
    output_prop["Test LTV"].append("$"+str(ceil(test_resp_rate*required_test*10)))
    output_prop["Control LTV"].append("$"+str(ceil(control_resp_rate*required_control*10)))



In [145]:
output_prop = {"Change":[],"Split_Test-Control":[],"Test Volume":[],"Control Volume":[],"Test Rate":[],"Control Rate":[], 
             "Test Claims":[],"Control Claims":[],"Test LTV":[],"Control LTV":[]}


In [146]:
m=[2,5,10,25,50]
for i in m:
    control_resp_rate = 0.552
    relative_change = 1
    control_size = i  
    sample_size_calc_prop(control_resp_rate,relative_change,control_size)
pd.DataFrame(output_prop)

Unnamed: 0,Change,Split_Test-Control,Test Volume,Control Volume,Test Rate,Control Rate,Test Claims,Control Claims,Test LTV,Control LTV
0,1%,98-2,2505848,51140,0.55752,0.552,1397061,28230,$13970604,$282293
1,1%,95-5,1002340,52755,0.55752,0.552,558825,29121,$5588246,$291208
2,1%,90-10,501170,55686,0.55752,0.552,279413,30739,$2794123,$307387
3,1%,75-25,200468,66823,0.55752,0.552,111765,36887,$1117650,$368863
4,1%,50-50,100234,100234,0.55752,0.552,55883,55330,$558825,$553292


In [147]:
import numpy as np
from statsmodels.stats.power import TTestIndPower
alpha = 0.05      
control = [0.5,0.4,0.3,0.2,0.1]  
lifts= [0.04,0.05,0.06,0.07,.08,0.09]
nobs1=12000 
final_power=pd.DataFrame()
for control in control:
    for lift in lifts:
        d = lift/np.sqrt(2)
        power=pd.DataFrame()
        analysis = TTestIndPower()
        power_calculate=analysis.solve_power(effect_size=d,alpha=0.05,nobs1=nobs1,ratio=control/(1-control))
        power['Test/control Split']=[str(int((1-control)*100)) + '/' +str(int(control*100))]
        power['Expected_lift']=str(int(lift*100))+'%'
        power['Power']=int(np.round(power_calculate*100,0))
        final_power=pd.concat([final_power,power])

In [148]:
final_power.pivot_table(values='Power',index='Test/control Split',columns='Expected_lift')

Expected_lift,4%,5%,6%,7%,8%,9%
Test/control Split,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
50/50,59,78,91,97,99,100
60/40,50,69,84,93,97,99
70/30,40,56,72,84,92,97
80/20,28,41,55,68,79,88
90/10,17,23,31,40,50,60


In [149]:
import numpy as np
from statsmodels.stats.power import TTestIndPower
alpha = 0.05      
test_split = [0.5]  
lifts= [0.04,0.05,0.06,0.07,.08,0.09]

nobs1=12200 
final_power_1=pd.DataFrame()

for test in test_split:
    for lift in lifts:
        d = lift/np.sqrt(2)
        power=pd.DataFrame()
        analysis = TTestIndPower()
        power_calculate=analysis.solve_power(effect_size=d,alpha=0.05,nobs1=nobs1,ratio=(1-test)/test)
        power['Test/control Split']=[str(int(test*100)) + '/' +str(int(round(1-test,1)*100))]
        power['Expected_lift']=str(int(lift*100))+'%'
        power['Power']=int(np.round(power_calculate*100,0))
        final_power_1=pd.concat([final_power_1,power])
final_power_1=final_power_1.pivot_table(values='Power',index='Test/control Split',columns='Expected_lift')


In [150]:
import numpy as np
from statsmodels.stats.power import TTestIndPower
alpha = 0.05      
test_split = [0.6]  
lifts= [0.04,0.05,0.06,0.07,.08,0.09]

nobs1=14500 
final_power_2=pd.DataFrame()

for test in test_split:
    for lift in lifts:
        d = lift/np.sqrt(2)
        power=pd.DataFrame()
        analysis = TTestIndPower()
        power_calculate=analysis.solve_power(effect_size=d,alpha=0.05,nobs1=nobs1,ratio=(1-test)/test)
        power['Test/control Split']=[str(int(test*100)) + '/' +str(int(round(1-test,1)*100))]
        power['Expected_lift']=str(int(lift*100))+'%'
        power['Power']=int(np.round(power_calculate*100,0))
        final_power_2=pd.concat([final_power_2,power])
final_power_2=final_power_2.pivot_table(values='Power',index='Test/control Split',columns='Expected_lift')


In [151]:
import numpy as np
from statsmodels.stats.power import TTestIndPower
alpha = 0.05      
test_split = [0.7]  
lifts= [0.04,0.05,0.06,0.07,.08,0.09]

nobs1=17200
final_power_3=pd.DataFrame()

for test in test_split:
    for lift in lifts:
        d = lift/np.sqrt(2)
        power=pd.DataFrame()
        analysis = TTestIndPower()
        power_calculate=analysis.solve_power(effect_size=d,alpha=0.05,nobs1=nobs1,ratio=(1-test)/test)
        power['Test/control Split']=[str(int(test*100)) + '/' +str(int(round(1-test,1)*100))]
        power['Expected_lift']=str(int(lift*100))+'%'
        power['Power']=int(np.round(power_calculate*100,0))
        final_power_3=pd.concat([final_power_3,power])
final_power_3=final_power_3.pivot_table(values='Power',index='Test/control Split',columns='Expected_lift')


In [152]:
import numpy as np
from statsmodels.stats.power import TTestIndPower
alpha = 0.05      
test_split = [0.8]  
lifts= [0.04,0.05,0.06,0.07,.08,0.09]

nobs1=19700
final_power_4=pd.DataFrame()

for test in test_split:
    for lift in lifts:
        d = lift/np.sqrt(2)
        power=pd.DataFrame()
        analysis = TTestIndPower()
        power_calculate=analysis.solve_power(effect_size=d,alpha=0.05,nobs1=nobs1,ratio=(1-test)/test)
        power['Test/control Split']=[str(int(test*100)) + '/' +str(int(round(1-test,1)*100))]
        power['Expected_lift']=str(int(lift*100))+'%'
        power['Power']=int(np.round(power_calculate*100,0))
        final_power_4=pd.concat([final_power_4,power])
final_power_4=final_power_4.pivot_table(values='Power',index='Test/control Split',columns='Expected_lift')


In [153]:
import numpy as np
from statsmodels.stats.power import TTestIndPower
alpha = 0.05      
test_split = [0.9]  
lifts= [0.04,0.05,0.06,0.07,.08,0.09]

nobs1=22470
final_power_5=pd.DataFrame()

for test in test_split:
    for lift in lifts:
        d = lift/np.sqrt(2)
        power=pd.DataFrame()
        analysis = TTestIndPower()
        power_calculate=analysis.solve_power(effect_size=d,alpha=0.05,nobs1=nobs1,ratio=(1-test)/test)
        power['Test/control Split']=[str(int(test*100)) + '/' +str(int(round(1-test,1)*100))]
        power['Expected_lift']=str(int(lift*100))+'%'
        power['Power']=int(np.round(power_calculate*100,0))
        final_power_5=pd.concat([final_power_5,power])
final_power_5=final_power_5.pivot_table(values='Power',index='Test/control Split',columns='Expected_lift')


In [154]:
pd.concat([final_power_1,final_power_2,final_power_3,final_power_4,final_power_5])

Expected_lift,4%,5%,6%,7%,8%,9%
Test/control Split,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
50/50,60,79,91,97,99,100
60/40,58,77,90,96,99,100
70/30,53,72,86,94,98,100
80/20,43,60,76,87,94,98
90/10,27,39,52,65,76,85
