#### Classical A/B testing Exercise

In [2]:
### Loading packages
import numpy as np
from scipy.stats import norm
from statsmodels.stats.weightstats import ztest

In [3]:
#s setting seed
np.random.seed(0)

In [5]:
# Intializing parameters for Z-test
N=100
mu=0.3
sigma=1
# Z=(X-mu)/sigma
x=np.random.randn(N)*sigma+mu


## Z-test - two-sided test

$H_0$: $\mu_0 = 0$

$H_1$: $\mu_0 \neq 0$

$Z=\frac{(\bar{x} -\mu_0)}{\frac{\sigma}{\sqrt{n}}}$

In [6]:
# two-sided z-test
ztest(x)

(3.673627434207382, 0.00023913133098375257)

In [15]:
# two-sided
mu_hat = x.mean()
sigma_hat=x.std(ddof=1)
z=mu_hat/(sigma_hat/np.sqrt(N))
print(f'z value: {z}')
p_right=1-norm.cdf(np.abs(z))
p_left=norm.cdf(-np.abs(z))
print(p_right,p_left)
p=p_right*2
print(f'p value:{p}')

z value: 3.6736274342073814
0.00011956566549187198 0.00011956566549187628
p value:0.00023913133098374395


## Z-test - two-sided test

$H_0$: $\mu_0 < 0$

$H_1$: $\mu_0 > 0$

$Z=\frac{(\bar{x} -\mu_0)}{\frac{\sigma}{\sqrt{n}}}$

In [16]:
ztest(x,alternative='larger')

(3.673627434207382, 0.00011956566549187628)

In [18]:
# two-sided
mu_hat = x.mean()
sigma_hat=x.std(ddof=1)
z=mu_hat/(sigma_hat/np.sqrt(N))
print(f'z value: {z}')
p=1-norm.cdf(np.abs(z))
# norm.sf(np.abs(z))
print(f'p value:{p}')

z value: 3.6736274342073814
p value:0.00011956566549187198


In [19]:
# null under different reference value
mu_0=0.2
ztest(x,value=mu_0)

(1.7503275907409126, 0.08006180262704525)

In [21]:
# two-sided
mu_hat = x.mean()
sigma_hat=x.std(ddof=1)
z=(mu_hat-mu_0)/(sigma_hat/np.sqrt(N))
print(f'z value: {z}')
p_right=1-norm.cdf(np.abs(z))
p_left=norm.cdf(-np.abs(z))
print(p_right,p_left)
p=p_right*2
print(f'p value:{p}')

z value: 1.7503275907409122
0.040030901313522715 0.040030901313522674
p value:0.08006180262704543


### Two sample test
$Z=\frac{(\bar{x}_1-\bar{x}_2)}{\sqrt{\sigma_1^2/n_1 + \sigma_2^2/n_2}}$

In [23]:
# two-sample
N1=100
mu1=0.3
sigma1=1
# Z=(X-mu)/sigma
x1=np.random.randn(N1)*sigma1+mu1

N2=100
mu2=0.5
sigma2=1
# Z=(X-mu)/sigma
x2=np.random.randn(N2)*sigma2+mu2


In [24]:
ztest(x1,x2)

(-0.0659859641954357, 0.947389000048316)

In [27]:
mu_hat1=x1.mean()
mu_hat2=x2.mean()
s2_hat1=x1.var(ddof=1)
s2_hat2=x2.var(ddof=1)
#
z=(mu_hat1-mu_hat2)/np.sqrt(s2_hat1/N1 +s2_hat2/N2)
print(f'z-value:{z}')
p_right=1-norm.cdf(np.abs(z))
p_left=norm.cdf(-np.abs(z))
print(p_right,p_left)
p=p_right*2
print(f'p value:{p}')

z-value:-0.0659859641954357
0.473694500024158 0.473694500024158
p value:0.947389000048316


In [33]:
## Hyypothesis testing on iterations
tests_num=1000
results=np.zeros(tests_num)
for i in range(tests_num):
    x0=np.random.randn(100)
    x1=np.random.randn(100)
    z,p=ztest(x0,x1)
    results[i]=(p<0.05)
    # print(results.mean())
print(results.mean())

0.061


We don't have enough evidence to reject null hypothesis

In [4]:
# 16+16+271+40+22000+43+50+87.80+28.71+103+120+118+6+844+130+10+1061+10001+110+87+85.41+24+190+3001+64++91+18+2956.04+1000

42571.96