<a href="https://colab.research.google.com/github/JaperTai77/Colab-notebook/blob/main/CI_Testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
from scipy.stats import norm, t
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/JaperTai77/data-modified/main/Alcohol_Sales.csv')
df.columns = ['year','sales']

In [None]:
# Z-confidence interval

def CI_Z(data, level = 0.95):
  l = 1 - (1-level)/2 
  N = len(data)
  mu = data.mean()
  sigma = np.sqrt(data.var(ddof = 1))
  z = norm.ppf(l)
  lower = mu-z*(sigma/np.sqrt(N))
  upper = mu+z*(sigma/np.sqrt(N))
  return (lower, mu, upper)

def CI_Z_proportion(data, level = 0.95):
  l = 1 - (1-level)/2 
  N = len(data)
  p = data.sum()/N
  z = norm.ppf(l)
  lower = mu-z*np.sqrt((p*(1-p))/N)
  upper = mu+z*np.sqrt((p*(1-p))/N)
  return (lower, mu, upper)

In [None]:
CI_Z(df.sales,level = 0.95)

(7569.562880534357, 7886.4, 8203.237119465643)

In [None]:
# t-confidence interval

def CI_t(data, level = 0.95):
  l = 1 - (1-level)/2 
  N = len(data)
  mu = data.mean()
  sigma = np.sqrt(data.var(ddof = 1))
  z = t.ppf(l,df=N - 1)
  lower = mu-z*(sigma/np.sqrt(N))
  upper = mu+z*(sigma/np.sqrt(N))
  return (lower, mu, upper)

In [None]:
CI_t(df.sales,level = 0.95)

(7568.374915564909, 7886.4, 8204.42508443509)

In [3]:
url = 'https://raw.githubusercontent.com/JaperTai77/data-modified/main/sales.csv'
df2 = pd.read_csv(url)

In [4]:
# two sided test
def two_side_test(data, mu0):
  N = len(data)
  mu = data.mean()
  sigma = np.sqrt(data.var(ddof = 1))
  z = (mu-mu0)/(sigma/np.sqrt(N)) #(p-p0)/sqrt(p0*(1-p0)/n)
  p1 = 1-norm.cdf(np.abs(z))
  p2 = norm.cdf(-np.abs(z))
  p = p1+p2
  return z,p

def two_side_test_twosample(data1, data2):
  N1 = len(data1)
  N2 = len(data2)
  mu1 = data1.mean()
  mu2 = data2.mean()
  sigma1 = data1.var(ddof=1)
  sigma2 = data2.var(ddof=1)

  s_hat = np.sqrt(sigma1/N1 + sigma2/N2)
  z = (mu2 - m1)/s_hat   #(p1-p2)/sqrt(p*(1-p)*(1/n1 + 1/n2)), p = (p1+p2)/2
  p1 = 1-norm.cdf(np.abs(z))
  p2 = norm.cdf(-np.abs(z))
  p = p1+p2
  return z,p


In [7]:
two_side_test(df2.Rating, 3)

(-0.6200436101655578, 0.5352290757920708)

In [9]:
# one sided test
def one_sided_test(data, mu0):
  N = len(data)
  mu = data.mean()
  sigma = np.sqrt(data.var(ddof = 1))
  z = (mu-mu0)/(sigma/np.sqrt(N)) #(p-p0)/sqrt(p0*(1-p0)/n)
  p = 1-norm.cdf(np.abs(z))
  return z,p

def one_side_test_twosample(data1, data2):
  N1 = len(data1)
  N2 = len(data2)
  mu1 = data1.mean()
  mu2 = data2.mean()
  sigma1 = data1.var(ddof=1)
  sigma2 = data2.var(ddof=1)

  s_hat = np.sqrt(sigma1/N1 + sigma2/N2)
  z = (mu2 - m1)/s_hat    #(p1-p2)/sqrt(p*(1-p)*(1/n1 + 1/n2)), p = (p1+p2)/2
  p = 1-norm.cdf(np.abs(z))
  return z,p

In [41]:
url = 'https://raw.githubusercontent.com/JaperTai77/data-modified/main/ab_data.csv'
df3 = pd.read_csv(url)
pd.crosstab(df3.group, df3.converted)

converted,0,1
group,Unnamed: 1_level_1,Unnamed: 2_level_1
control,129479,17723
treatment,129762,17514


In [35]:
T = np.array(pd.crosstab(df3.group, df3.converted))
T

array([[129479,  17723],
       [129762,  17514]])

In [40]:
# Chi-square test (contingency)

from scipy.stats import chi2
# sum of rows and columns
R1 = T[0].sum()
R2 = T[1].sum()
B1 = T[0][0]+T[1][0]
B2 = T[0][1]+T[1][1]
total = R1+R2 #or B1+B2

# Expected table
E = T.copy()
E[0][0] = R1*B1/total
E[0][1] = R1*B2/total
E[1][0] = R2*B1/total
E[1][1] = R2*B2/total

# (Observe-Expected)^2/Expected
X = ((T[0][0]-E[0][0])**2)/E[0][0]+((T[0][1]-E[0][1])**2)/E[0][1]+((T[1][0]-E[1][0])**2)/E[1][0]+((T[1][1]-E[1][1])**2)/E[1][1]
degree = (2-1)*(2-1)
print(X,1-chi2.cdf(X,df=degree))

1.5180661022531088 0.21791238435460514


In [66]:
# Chi-square test
def chi2_test(obs,exp):
  l = len(obs)
  X = 0
  for i in range(l):
    X = X + (obs[i]-exp[i])**2/exp[i]
  p = 1-chi2.cdf(X,df=l-1)

  resid = [round((x-y)/np.sqrt(y),3) for x,y in zip(obs,exp)]
  print('Residuals are',resid)
  return X,p

In [67]:
chi2_test([200, 200, 250, 140, 120], [205, 203, 251, 149, 124])

Residuals are [-0.349, -0.211, -0.063, -0.737, -0.359]


(0.8429266777650147, 0.932602254149896)