In [14]:
from scipy.stats import norm
import numpy as np
import math

In [15]:
# 1. given alpha, calculte z-score
def z_score(a):
    return -1*norm.ppf(a/2)

In [98]:
# 2. given alpha, sample size, calculate beta
def return_beta(z, p1, d_min, N):
    c = p1 + z*math.sqrt(p1*(1-p1)*2/N)
    p2 = p1 + d_min
    se = math.sqrt(p2*(1-p2)*2/N)
    return norm.cdf(c, loc = p2, scale = se)

In [99]:
# for a series of N, find smallest N that will give target beta
def sample_size(alpha, beta, p1, d_min):
    z = z_score(alpha)
    n = 1
    T = True
    while T:
        if return_beta(z, p1, d_min, n) <= beta:
            T = False
        n +=1
    return n-1

In [100]:
# firstly try if we need to use Bonferroni correction
alpha_total = 0.05
b = 0.2
n1= sample_size(0.05, b, 0.53, 0.01)
n2= sample_size(0.05, b, 0.11, 0.0075)

In [101]:
n1

39071

In [102]:
n2

27802

# Sanity Check

## Number of Cookies

In [138]:
def confidence_interval(n_con, n_exp, p, a):
    p_test = n_con/(n_con + n_exp)
    se = math.sqrt(p*(1-p)/(n_con + n_exp))
    z = z_score(a)
    # margin of error
    m = z * se
    return p - m, p + m, p_test, ((p_test >= p-m) & (p_test <= p + m))

In [139]:
n_con = 345543
n_exp = 344660
p = 0.5
a = 0.05
confidence_interval(n_con, n_exp, p, a)

(0.49882041382459419, 0.50117958617540581, 0.5006396668806133, True)

## Number of Clicks

In [140]:
n_con = 28378
n_exp = 28325
p = 0.5
a = 0.05
confidence_interval(n_con, n_exp, p, a)

(0.49588457134714631, 0.50411542865285364, 0.5004673474066628, True)

## Click Through Probability

In [141]:
p_con = 28378/345543
p_exp = 28325/344660
d_min = p_exp - p_con

In [146]:
p_pool = (28378 + 28325)/(345543 + 344660)

In [148]:
se_pool = math.sqrt(p_pool * (1 - p_pool)*(1/345543 + 1/344660))
se_pool

0.0006610608156387222

In [144]:
a = 0.05
z = z_score(a)
m = z * se_pool

In [150]:
-m, m, d_min

(-0.0012956553902425685, 0.0012956553902425685, 5.662709158693602e-05)

# Check for Practical and Statistical Significance


####  - For your evaluation metrics, calculate a confidence interval for the difference between the experiment and control groups

#### - A metric is statistically significant if the confidence interval does not include 0 (that is, you can be confident there was a change)

#### - A metric is practically significant if the confidence interval does not include the practical significance boundary (that is, you can be confident there is a change that matters to the business.)

### Retention
#### - payment/enroll
#### - d_min = 0.01

In [126]:
import pandas as pd

In [127]:
df1 = pd.read_excel('/Users/shuyijiang1/Downloads/Final Project Results.xlsx', sheetname= 'Control')
df2 = pd.read_excel('/Users/shuyijiang1/Downloads/Final Project Results.xlsx', sheetname= 'Experiment')

In [128]:
df_con = df1[df1['Enrollments'].notnull()]
df_exp = df2[df2['Enrollments'].notnull()]

In [129]:
df_con.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments
0,"Sat, Oct 11",7723,687,134,70
1,"Sun, Oct 12",9102,779,147,70
2,"Mon, Oct 13",10511,909,167,95
3,"Tue, Oct 14",9871,836,156,105
4,"Wed, Oct 15",10014,837,163,64


In [130]:
p_con = df_con['Payments'].sum()/df_con['Enrollments'].sum()
p_exp = df_exp['Payments'].sum()/df_exp['Enrollments'].sum()

In [131]:
p_pool = (df_con['Payments'].sum() + df_exp['Payments'].sum())/(df_con['Enrollments'].sum() + df_exp['Enrollments'].sum())

In [132]:
p_con, p_exp, p_pool

(0.537120211360634, 0.5682150160677768, 0.5518867924528302)

In [133]:
n1 = df_con['Enrollments'].sum()
n2 = df_exp['Enrollments'].sum()

#### Calculate Confidenc for difference

In [135]:
alpha = 0.05
z = z_score(alpha)
m = z * math.sqrt(p_pool * (1-p_pool) *(1/n1 + 1/n2))

In [136]:
d_min = p_exp - p_con

In [137]:
d_min - m, d_min + m

(0.0048036436160968678, 0.057385965798188665)

In [113]:
if (0 >= d_min - m) & (0<= d_min + m):
    print("Not Statistically significant")
else:
    print("statistically significant")

statistically significant


In [114]:
if (0.01 >= d_min - m) & (0.01 <= d_min + m):
    print("Not practically significant")
else:
    print("Practically significant")

Not practically significant


### Net Conversion
#### - payment/clicks
#### - d_min = 0.0075

In [115]:
p_con = df_con['Payments'].sum()/df_con['Clicks'].sum()
p_exp = df_exp['Payments'].sum()/df_exp['Clicks'].sum()
p_pool = (df_con['Payments'].sum() + df_exp['Payments'].sum())/(df_con['Clicks'].sum() + df_exp['Clicks'].sum())

In [116]:
p_con, p_exp, p_pool

(0.11756201931417337, 0.1126882966396292, 0.1151274853124186)

In [117]:
n1 = df_con['Clicks'].sum()
n2 = df_exp['Clicks'].sum()

#### Calculate Confidenc for difference

In [124]:
alpha = 0.05
z = z_score(alpha)
m = z * math.sqrt(p_pool * (1-p_pool) *(1/n1 + 1/n2))

In [119]:
d_min = p_exp - p_con
d_min

-0.0048737226745441675

In [123]:
if (0 >= d_min - m) & (0<= d_min + m):
    print("Not Statistically significant")
    print("Not Practically significant")
else:
    print("statistically significant")

Not Statistically significant
Not Practically significant


In [122]:
d_min - m, d_min + m

(-0.011604500677993734, 0.0018570553289054001)

# Sign Test

### Retention
#### - payment/enroll
#### - d_min = 0.01

In [151]:
df_con.is_copy = False
df_exp.is_copy = False

In [154]:
df_con['Retention'] = df_con['Payments'] / df_con['Enrollments']
df_exp['Retention'] = df_exp['Payments'] /df_exp['Enrollments']

In [155]:
df_con.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments,Retention
0,"Sat, Oct 11",7723,687,134,70,0.522388
1,"Sun, Oct 12",9102,779,147,70,0.47619
2,"Mon, Oct 13",10511,909,167,95,0.568862
3,"Tue, Oct 14",9871,836,156,105,0.673077
4,"Wed, Oct 15",10014,837,163,64,0.392638


In [163]:
d1 = list(sorted(df_exp['Date'].unique()))
d2 = list(sorted(df_exp['Date'].unique()))
d1 == d2

True

In [166]:
df_retention = df_con.merge(df_exp[['Date','Retention']], how = 'left', on = 'Date')
df_retention.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments,Retention_x,Retention_y
0,"Sat, Oct 11",7723,687,134,70,0.522388,0.32381
1,"Sun, Oct 12",9102,779,147,70,0.47619,0.784483
2,"Mon, Oct 13",10511,909,167,95,0.568862,0.544828
3,"Tue, Oct 14",9871,836,156,105,0.673077,0.666667
4,"Wed, Oct 15",10014,837,163,64,0.392638,0.671429


In [169]:
df_retention['diff'] = df_retention['Retention_y'] - df_retention['Retention_x']
df_retention.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments,Retention_x,Retention_y,diff
0,"Sat, Oct 11",7723,687,134,70,0.522388,0.32381,-0.198579
1,"Sun, Oct 12",9102,779,147,70,0.47619,0.784483,0.308292
2,"Mon, Oct 13",10511,909,167,95,0.568862,0.544828,-0.024035
3,"Tue, Oct 14",9871,836,156,105,0.673077,0.666667,-0.00641
4,"Wed, Oct 15",10014,837,163,64,0.392638,0.671429,0.278791


In [182]:
positive = df_retention.loc[df_retention['diff'] > 0].shape[0]
positive

13

In [184]:
negative = df_retention.loc[df_retention['diff'] <= 0].shape[0]
negative

10

In [185]:
p = 0.5
a= 0.05
p_test = positive/(positive + negative)
p_test

0.5652173913043478

#### Sign test is a binomial distribution, usually when np > 5 and n(1-p) >5 we can use normal distribution

In [193]:
n = df_retention['diff'].shape[0]
n*p > 5

True

###### Confidence Interval

In [194]:
z = z_score(a)
se = math.sqrt(p*(1-p)/n)
p - z*se, p + z*se

(0.2956596290953305, 0.7043403709046695)

##### Test P is 0.5652 and within confidence interval range
##### And also based on the link below, 2-tailed p value for binomial test is 0.6776
###### [This online calculator](https://www.graphpad.com/quickcalcs/binomial2/)

### Net Conversion
#### - payment/clicks
#### - d_min = 0.0075

In [195]:
df_con['Net_Conversion'] = df_con['Payments'] / df_con['Clicks']
df_exp['Net_Conversion'] = df_exp['Payments'] /df_exp['Clicks']

In [196]:
df_con.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments,Retention,Net_Conversion
0,"Sat, Oct 11",7723,687,134,70,0.522388,0.101892
1,"Sun, Oct 12",9102,779,147,70,0.47619,0.089859
2,"Mon, Oct 13",10511,909,167,95,0.568862,0.10451
3,"Tue, Oct 14",9871,836,156,105,0.673077,0.125598
4,"Wed, Oct 15",10014,837,163,64,0.392638,0.076464


In [None]:
df_netconversion = df_con.merge()