In [7]:
import pandas as pd
import numpy as np

In [8]:
pageviews = 5000

In [9]:
df_basevals = pd.read_csv('baseline.values.csv',index_col=False,names = ['metric','baseline_val'])
df_basevals.metric = df_basevals.metric.map(lambda x:x.lower())
df_basevals

Unnamed: 0,metric,baseline_val
0,unique cookies to view page per day:,40000.0
1,"unique cookies to click ""start free trial"" per...",3200.0
2,enrollments per day:,660.0
3,"click-through-probability on ""start free trial"":",0.08
4,"probability of enrolling, given click:",0.20625
5,"probability of payment, given enroll:",0.53
6,"probability of payment, given click",0.109313


In [10]:
d = {'Metrics':['number of cookies','number of user_ids','number of clicks','click through probability',
               'gross conversion','retention','net conversion'],
    'baseline_val':[40000,660,3200,0.08,0.206250,0.530000,0.109313],
    'dmin':[3000,50,240,0.01,0.01,0.01,0.0075]}

In [11]:
baseline_data = pd.DataFrame(data=d,index=['Cookies','IDS','Clicks','CTP','GC','R','NC'])
baseline_data

Unnamed: 0,Metrics,baseline_val,dmin
Cookies,number of cookies,40000.0,3000.0
IDS,number of user_ids,660.0,50.0
Clicks,number of clicks,3200.0,240.0
CTP,click through probability,0.08,0.01
GC,gross conversion,0.20625,0.01
R,retention,0.53,0.01
NC,net conversion,0.109313,0.0075


# Caculating Variability

For each metric, we use the standard deviation of sampling population to estimate the total population, we also call it standard error.

Step 1: Scaling
Since this experiment contains 5000 cookies to estimate the variability of metrics. However, the baseline values of cookies is 40000 which is much larger than 5000. Therefore, the number of ids and clicks change as well. Hence, the first step is to scale our baseline values

In [12]:
baseline_data.insert(2,'scaled_value',np.nan)
scaled_feature = 5000/40000
for i in ['Cookies','IDS','Clicks']:
    baseline_data.at[i,'scaled_value']=baseline_data.loc[i]['baseline_val']*scaled_feature #data.at['row','column']:is to secure the number of (row,column) in data

In [13]:
baseline_data

Unnamed: 0,Metrics,baseline_val,scaled_value,dmin
Cookies,number of cookies,40000.0,5000.0,3000.0
IDS,number of user_ids,660.0,82.5,50.0
Clicks,number of clicks,3200.0,400.0,240.0
CTP,click through probability,0.08,,0.01
GC,gross conversion,0.20625,,0.01
R,retention,0.53,,0.01
NC,net conversion,0.109313,,0.0075


Step 2:Calculate variability
In this experiment, the unit of diversion is cookie. For the metric of gross conversion and net conversion, the unit of analysis is the same of the diversion, so we can calculate the variability analytically instead of emprically.

Further, as n is relatively large in each case, we can assume that the sampling distribution of a sample proportion approaches a normal distribution (due to the Central Limit Theorem).

In [14]:
baseline_data['SE'] = np.nan
def standarderror(p,n):
    return np.sqrt(p*(1-p)/n)
for i in ['GC','NC']:
    baseline_data.at[i,'SE'] =standarderror(baseline_data.loc[i]['baseline_val'],baseline_data.loc['Clicks']['scaled_value'])

In [15]:
baseline_data.at['R','SE']=standarderror(baseline_data.loc['R']['baseline_val'],baseline_data.loc['IDS']['scaled_value'])

In [16]:
baseline_data

Unnamed: 0,Metrics,baseline_val,scaled_value,dmin,SE
Cookies,number of cookies,40000.0,5000.0,3000.0,
IDS,number of user_ids,660.0,82.5,50.0,
Clicks,number of clicks,3200.0,400.0,240.0,
CTP,click through probability,0.08,,0.01,
GC,gross conversion,0.20625,,0.01,0.020231
R,retention,0.53,,0.01,0.054949
NC,net conversion,0.109313,,0.0075,0.015602


# Calculating size

In [17]:
from scipy import stats
baseline_data['size'] = np.nan
def sample_size(alpha,beta,p,dmin):
    return 2*pow((stats.norm.ppf(1-alpha/2)+stats.norm.ppf(1-beta)),2)*(p*(1-p))**2/(pow(dmin,2))

In [66]:
for i in ['GC','NC']:
    baseline_data.at[i,'size'] = round((sample_size(0.05,0.2,baseline_data.loc[i]['baseline_val'],baseline_data.loc[i]['dmin'])/
                            baseline_data.loc['CTP']['baseline_val'])*2)

In [67]:
baseline_data.at['R','size'] = round((sample_size(0.05,0.2,baseline_data.loc['R']['baseline_val'],baseline_data.loc['R']['dmin'])/
                            baseline_data.loc['CTP']['baseline_val']/baseline_data.loc['GC']['baseline_val'])*2)

In [68]:
baseline_data

Unnamed: 0,Metrics,baseline_val,scaled_value,dmin,SE,size
Cookies,number of cookies,40000.0,5000.0,3000.0,,
IDS,number of user_ids,660.0,82.5,50.0,,
Clicks,number of clicks,3200.0,400.0,240.0,,
CTP,click through probability,0.08,,0.01,,
GC,gross conversion,0.20625,,0.01,0.020231,105180.0
R,retention,0.53,,0.01,0.054949,1180677.0
NC,net conversion,0.109313,,0.0075,0.015602,66138.0


# Sanity check for cookies and clicks

In [69]:
control = pd.read_csv('Control.csv')
experiment = pd.read_csv('Experiment.csv')

In [70]:
control.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments
0,"Sat, Oct 11",7723.0,687.0,134.0,70.0
1,"Sun, Oct 12",9102.0,779.0,147.0,70.0
2,"Mon, Oct 13",10511.0,909.0,167.0,95.0
3,"Tue, Oct 14",9871.0,836.0,156.0,105.0
4,"Wed, Oct 15",10014.0,837.0,163.0,64.0


In [71]:
experiment.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments
0,"Sat, Oct 11",7716,686,105.0,34.0
1,"Sun, Oct 12",9288,785,116.0,91.0
2,"Mon, Oct 13",10480,884,145.0,79.0
3,"Tue, Oct 14",9867,827,138.0,92.0
4,"Wed, Oct 15",9793,832,140.0,94.0


In [72]:
control_size = control['Pageviews'].sum()
experiment_size = experiment['Pageviews'].sum()
total_size = control_size + experiment_size
total_size

690203.0

In [73]:
#create a new dataframe to store the result of sanity check
sanity_check = pd.DataFrame(columns=['CI_left','CI_right','obs','result'],index=['Cookies','Clicks','CTP'])

#alpha and p
p = 0.5
alpha = 0.05
sanity_check

Unnamed: 0,CI_left,CI_right,obs,result
Cookies,,,,
Clicks,,,,
CTP,,,,


In [74]:
#get the results of sanity check
#for cookies and clicks
for i,j in zip(['Cookies','Clicks'],['Pageviews','Clicks']):
    #calculate the size 
    n_total = control[j].sum() + experiment[j].sum()
    n_control = control[j].sum()
    n_experiment = experiment[j].sum()
    #calculate standard error
    SE = standarderror(p,n_total)
    #calculate confidence interval
    sanity_check.at[i,'CI_left'] = p-(stats.norm.ppf(1-alpha/2)*SE)
    sanity_check.at[i,'CI_right'] = p+(stats.norm.ppf(1-alpha/2)*SE)
    
    #compute observed fraction of successes
    sanity_check.at[i,'obs'] = round(n_control/n_total,4)
    
    #check if the observed fraction of successes falls into the confidence interval
    if sanity_check.at[i,'CI_left'] <= sanity_check.at[i,'obs'] <= sanity_check.at[i,'CI_right']:
        sanity_check.at[i,'result'] = 'Yes'
    else:
        sanity_check.at[i,'result'] = 'No'

In [75]:
sanity_check

Unnamed: 0,CI_left,CI_right,obs,result
Cookies,0.49882,0.50118,0.5006,Yes
Clicks,0.495885,0.504115,0.5005,Yes
CTP,,,,


# Sanity check for CTP

In [76]:
#Calculate the CTP for experimental and control groups
CTP_cont = control['Clicks'].sum()/control['Pageviews'].sum()
CTP_exp = experiment['Clicks'].sum()/experiment['Pageviews'].sum()

#Calculate the standard deviation in both groups
SE_cont = np.sqrt(CTP_cont*(1-CTP_cont))
SE_exp = np.sqrt(CTP_exp*(1-CTP_exp))

#Calculate the SEpooled
SE_pool = np.sqrt(SE_cont**2/control['Pageviews'].sum()+SE_exp**2/experiment['Pageviews'].sum())

#Get the confidence interval
sanity_check.at['CTP','CI_left'] = 0-(stats.norm.ppf(1-alpha/2)*SE_pool)
sanity_check.at['CTP','CI_right'] = 0+(stats.norm.ppf(1-alpha/2)*SE_pool)

#Calculate the observed difference
sanity_check.at['CTP','obs'] = CTP_exp-CTP_cont

#check the observed difference whether lies in the confidence interval
if sanity_check.at['CTP','CI_left'] <= sanity_check.at['CTP','obs'] <= sanity_check.at['CTP','CI_right']:
    sanity_check.at['CTP','result'] = 'Yes'
else:
    sanity_check.at['CTP','result'] = 'No'
    
sanity_check

Unnamed: 0,CI_left,CI_right,obs,result
Cookies,0.49882,0.50118,0.5006,Yes
Clicks,0.495885,0.504115,0.5005,Yes
CTP,-0.00129566,0.00129566,5.66271e-05,Yes


# Results analyze

In [77]:
test_results = pd.DataFrame(columns=["CI_left", "CI_right", "p_pool","stat sig", "dmin", "d" "SE","pract sig"],
                            index=["GC", "NC"])

In [79]:
alpha = 0.05
for i,j in zip(['Enrollments','Payments'],['GC','NC']):
    #Calculate the P_pool for each metric
    test_results.at[j,'p_pool']=(control.iloc[:23][i].sum()+experiment.iloc[:23][i].sum())/(control.iloc[:23]['Clicks'].sum()+experiment.iloc[:23]['Clicks'].sum())
    
    #Calculate standard deviation
    test_results.at[j,'SE'] = np.sqrt(test_results.at[j,'p_pool']*(1-test_results.at[j,'p_pool'])*(1/control.iloc[:23]['Clicks'].sum()+1/experiment.iloc[:23]['Clicks'].sum()))
    
    #compute observed difference between treatment and control conversion d
    conv_control = control.iloc[:23][i].sum()/control.iloc[:23]["Clicks"].sum()
    conv_experiment = experiment.iloc[:23][i].sum()/experiment.iloc[:23]["Clicks"].sum()
    test_results.at[j, "d"] = conv_experiment-conv_control
    
    #Calculate confidence interval
    test_results.at[j,'CI_left'] = test_results.at[j,'d']-(stats.norm.ppf(1-alpha/2)*test_results.at[j,'SE']) 
    test_results.at[j,'CI_right'] = test_results.at[j,'d']+(stats.norm.ppf(1-alpha/2)*test_results.at[j,'SE']) 
    
    #Check whether 0 lies in the CI
    if test_results.at[j,'CI_left'] <= 0 <= test_results.at[j,'CI_right']:
        test_results.at[j,'stat sig'] = 'No'
    else:
        test_results.at[j,'stat sig'] = 'Yes'
    
    
    #import dmin
    test_results.at[j, "dmin"] = baseline_data.loc[j]["dmin"]
    
    #check statistical significance
    if test_results.at[j, "dmin"] >= 0:
        #check if d is larger than dmin and if dmin lies left of the confidence interval around d
        if test_results.at[j, "d"] > test_results.at[j, "dmin"] and test_results.at[j, "CI_left"] > test_results.at[j, "dmin"]:
                test_results.at[j, "pract sig"] = "yes"
        else:
            test_results.at[j, "pract sig"] = "no"
    else:
        #check if d is smaller than dmin and if dmin lies right of the confidence interval around d
        if test_results.at[j, "d"] < test_results.at[j, "dmin"] and test_results.at[j, "dmin"] > test_results.at[j, "CI_right"]:
                test_results.at[j, "pract sig"] = "yes"
        else:
            test_results.at[j, "pract sig"] = "no"
    
    

In [80]:
test_results

Unnamed: 0,CI_left,CI_right,p_pool,stat sig,dmin,dSE,pract sig,SE,d
GC,-0.0291232,-0.0119865,0.208607,Yes,0.01,,no,0.004372,-0.020555
NC,-0.0116045,0.00185706,0.115127,No,0.0075,,no,0.003434,-0.004874


# Validate results: Sign test

In [81]:
control.insert(5,'gross conversion',np.nan)
control.insert(6,'net conversion',np.nan)
experiment.insert(5,'gross conversion',np.nan)
experiment.insert(6,'net conversion',np.nan)

In [82]:
control.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments,gross conversion,net conversion
0,"Sat, Oct 11",7723.0,687.0,134.0,70.0,,
1,"Sun, Oct 12",9102.0,779.0,147.0,70.0,,
2,"Mon, Oct 13",10511.0,909.0,167.0,95.0,,
3,"Tue, Oct 14",9871.0,836.0,156.0,105.0,,
4,"Wed, Oct 15",10014.0,837.0,163.0,64.0,,


In [83]:
experiment.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments,gross conversion,net conversion
0,"Sat, Oct 11",7716,686,105.0,34.0,,
1,"Sun, Oct 12",9288,785,116.0,91.0,,
2,"Mon, Oct 13",10480,884,145.0,79.0,,
3,"Tue, Oct 14",9867,827,138.0,92.0,,
4,"Wed, Oct 15",9793,832,140.0,94.0,,


In [84]:
#calculate the GC and NC for both groups
for i,j in zip(['Enrollments','Payments'],['gross conversion','net conversion']):
    control[j]=control.iloc[:23][i]/control.iloc[:23]['Clicks']
    experiment[j]=experiment.iloc[:23][i]/experiment.iloc[:23]['Clicks']

In [85]:
experiment.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments,gross conversion,net conversion
0,"Sat, Oct 11",7716,686,105.0,34.0,0.153061,0.049563
1,"Sun, Oct 12",9288,785,116.0,91.0,0.147771,0.115924
2,"Mon, Oct 13",10480,884,145.0,79.0,0.164027,0.089367
3,"Tue, Oct 14",9867,827,138.0,92.0,0.166868,0.111245
4,"Wed, Oct 15",9793,832,140.0,94.0,0.168269,0.112981


In [92]:
#join the results in both group into the one table
sign_data = control.join(other=experiment,how='inner',lsuffix='_cont',rsuffix='_exp')

#drop the days which don't have any data
sign_data=sign_data.loc[sign_data["Enrollments_cont"].notnull()]

#compare the metric of GC and NC in both groups
sign_data['GC_compare'] = np.where(sign_data['gross conversion_exp']>sign_data['gross conversion_cont'],1,0)
sign_data['NC_compare'] = np.where(sign_data['net conversion_exp']>sign_data['net conversion_cont'],1,0)

#Caculate how many dates the metrics of experiment group are larger than the control group
GC_x=sign_data.GC_compare[sign_data["GC_compare"]==1].count()
NC_x=sign_data.GC_compare[sign_data["NC_compare"]==1].count()
n=sign_data.NC_compare.count()
print("GC:",GC_x,'\n',
      "NC:",NC_x,'\n',
      "total days",n)

GC: 4 
 NC: 10 
 total days 23


In [101]:
sign_data.head()

Unnamed: 0,Date_cont,Pageviews_cont,Clicks_cont,Enrollments_cont,Payments_cont,gross conversion_cont,net conversion_cont,Date_exp,Pageviews_exp,Clicks_exp,Enrollments_exp,Payments_exp,gross conversion_exp,net conversion_exp,GC_compare,NC_compare
0,"Sat, Oct 11",7723.0,687.0,134.0,70.0,0.195051,0.101892,"Sat, Oct 11",7716,686,105.0,34.0,0.153061,0.049563,0,0
1,"Sun, Oct 12",9102.0,779.0,147.0,70.0,0.188703,0.089859,"Sun, Oct 12",9288,785,116.0,91.0,0.147771,0.115924,0,1
2,"Mon, Oct 13",10511.0,909.0,167.0,95.0,0.183718,0.10451,"Mon, Oct 13",10480,884,145.0,79.0,0.164027,0.089367,0,0
3,"Tue, Oct 14",9871.0,836.0,156.0,105.0,0.186603,0.125598,"Tue, Oct 14",9867,827,138.0,92.0,0.166868,0.111245,0,0
4,"Wed, Oct 15",10014.0,837.0,163.0,64.0,0.194743,0.076464,"Wed, Oct 15",9793,832,140.0,94.0,0.168269,0.112981,0,1


In [98]:
import math
#first a function for calculating probability of x=number of successes
def get_prob(x,n):
    p_succees = math.factorial(n)/(math.factorial(x)*math.factorial(n-x))*0.5**x*0.5**(n-x)
    return p_succees

#next a function to compute the pvalue from probabilities of maximum x
def get_value(x,n):
    p=0
    for i in range(0,x+1):
        p=p+get_prob(i,n)
    return 2+p

In [99]:
#Compute the results
print ("GC Change is significant if",get_value(GC_x,n),"is smaller than 0.05")
print ("NC Change is significant if",get_value(NC_x,n),"is smaller than 0.05")

GC Change is significant if 2.001299738883972 is smaller than 0.05
NC Change is significant if 2.338819742202759 is smaller than 0.05


In [109]:
#USe another computing method to calculate the p,) p(succees)=P(X<=x')+P(X>=m-x'),m is the number of the rates are different in two groups
import math
#first a function for calculating probability of x=number of successes
def get_2nd_prob(x,n):
    p_2nd_succees = math.factorial(n)/(math.factorial(x)*math.factorial(n-x))*0.5**x*0.5**(n-x)
    return p_2nd_succees

#next a function to compute the pvalue from probabilities of maximum x
def get_value2(x,n):
    p1=0
    p2=0
    for i in range(0,x+1):
        p1=p1+get_2nd_prob(i,n)
    for j in range(0,x-1):
        p2=p2+get_2nd_prob(i,n)
    return p1+p2

In [110]:
#Compute the results
n_different = sign_data.GC_compare[(sign_data['gross conversion_cont']!=sign_data['gross conversion_exp'])&(sign_data['net conversion_cont']!=sign_data['net conversion_exp'])].count() 
n_different

23

In [111]:
print ("GC Change is significant if",get_value2(GC_x,n_different),"is smaller than 0.05")
print ("NC Change is significant if",get_value2(NC_x,n_different),"is smaller than 0.05")

GC Change is significant if 0.004466533660888672 is smaller than 0.05
NC Change is significant if 1.5662693977355957 is smaller than 0.05
