In [1]:
import pandas as pd
import numpy as np

## Measuring Variability

In [4]:
# calculate sample standard deviation of gross conversion

# number of cookies
n=5000
#Click-through-probability on "Start free trial":
ctr = 0.08
#probability of enrolling,given click
p=0.20625

#standard deviation of gross conversion
std = np.sqrt(p*(1-p)/(n*ctr))

print('standard deviation of gross conversion is: '+ str(std))

standard deviation of gross conversion is: 0.020230604137


In [6]:
# calculate sample standard deviation of retention

# number of cookies
n=5000

#Click-through-probability on "Start free trial":
ctr=0.08
#Probability of enrolling, given click:
p1=0.20625

#Probability of payment, given enroll:
p=0.53

#standard deviation of retention
std= np.sqrt(p*(1-p)/(n*ctr*p1))

print('standard deviation of retention is: '+ str(std))

standard deviation of retention is: 0.0549490121785


In [7]:
# calculate sample standard deviation of net conversion

# number of cookies
n=5000

#Click-through-probability on "Start free trial":
ctr=0.08


#Probability of payment, given click
p=0.1093125

#standard deviation of retention
std= np.sqrt(p*(1-p)/(n*ctr))

print('standard deviation of Net conversion is: '+ str(std))

standard deviation of Net conversion is: 0.0156015445825


## Sizing

In [2]:
import pandas as pd
control = pd.read_excel('data.xlsx',sheet_name='Control')
experiment = pd.read_excel('data.xlsx',sheet_name='Experiment')

### Sanity check 

In [13]:
def sanitycheck(column):
    total = control[column].sum()+experiment[column].sum()
    p=0.5
    standarderror = np.sqrt(p*(1-p)/total)
    marginerror = 1.96*standarderror
    ci_low = p-marginerror
    ci_upper= p+ marginerror
    observed = control[column].sum()/total
    if observed <= ci_upper and observed>=ci_low:
        print('sanity check of '+ column +' passed')
    else:
        print('sanity check of '+ column +' failed')

In [17]:
control = control.set_index('Date')

In [18]:
for col in control.columns:
    sanitycheck(col)

sanity check of Pageviews passed
sanity check of Clicks passed
sanity check of Enrollments failed
sanity check of Payments passed


## Analyse the result

In [5]:
# drop null records
control = control[control['Enrollments'].notnull()]
experiment = experiment[experiment['Enrollments'].notnull()]

In [25]:

def CI(z_score, demoniator_exp, demoniator_cont, numeritor_exp, numeritor_cont):


    #demoninator
    D_ex = demoniator_exp.sum()
    D_con = demoniator_cont.sum()

    #numeritor
    N_ex = numeritor_exp.sum()
    N_con = numeritor_cont.sum()

    P_pool = (N_ex+N_con)/(D_ex+D_con)
    SE_pool = np.sqrt(P_pool*(1-P_pool)*(1/D_ex+1/D_con))
    margin = z_score*SE_pool

    mean = N_ex/D_ex - N_con/D_con
    
    upper = mean+margin
    lower = mean-margin
    
    return upper,lower, mean



In [28]:
# gross conversion
upper, lower ,mean = CI(1.96,experiment['Clicks'],control['Clicks'], experiment['Enrollments'], control['Enrollments'])
print('Confident Interval for Gross conversion is {} (lowerbound), {}(upperbound)'.format(lower,upper))

Confident Interval for Gross conversion is -0.0291233583354044 (lowerbound), -0.01198639082531873(upperbound)


In [29]:
# Net conversion
upper, lower ,mean = CI(1.96,experiment['Clicks'],control['Clicks'], experiment['Payments'], control['Payments'])
print('Confident Interval for Net conversion is {} (lowerbound), {}(upperbound)'.format(lower,upper))

Confident Interval for Net conversion is -0.011604624359891718 (lowerbound), 0.001857179010803383(upperbound)
