In [1]:
import numpy as np
import math
from scipy import stats

# Independent samples when sigma is known

In [2]:
def Z_and_P(x1, x2, sigma1, sigma2, n1, n2):
    z = (x1 - x2)/(math.sqrt(((sigma1**2/n1)+(sigma2**2/n2))))
    p = stats.norm.cdf(z)
    if(z >= 0):
        p = 1 - p
    
    return z, p

In [3]:
Z_and_P(121, 112, 8, 8, 10, 10)

(2.5155764746872635, 0.00594189462107364)

### Independent samples when sigma is not known but assumed equal

In [4]:
b = [89.19, 90.95, 90.46, 93.21, 97.19, 97.04, 91.07, 92.75]
a = [91.5, 94.18, 92.18, 95.39, 91.79, 89.07, 94.72, 89.21]

In [5]:
stats.ttest_ind(a, b, equal_var=True)

Ttest_indResult(statistic=-0.3535908643461798, pvalue=0.7289136186068217)

In [6]:
stats.t.ppf(0.025, 14)

-2.1447866879169277

### Independent samples when sigma is not known but assumed unequal

In [7]:
metro = [3, 7, 25, 10, 15, 6, 12, 25, 15, 7]
rular = [48, 44, 40, 38, 33, 21, 20, 12, 1, 18]

In [8]:
stats.ttest_ind(metro, rular, equal_var=False)

Ttest_indResult(statistic=-2.7669395785560558, pvalue=0.015827284816100885)

# Dependent Samples

In [9]:
KARL = [1.186, 1.151, 1.322, 1.339, 1.200, 1.402, 1.365, 1.537, 1.559]
LEH = [1.061, 0.992, 1.063, 1.062, 1.065, 1.178, 1.037, 1.086, 1.052]

In [10]:
stats.ttest_rel(KARL, LEH)

Ttest_relResult(statistic=6.0819394375848255, pvalue=0.00029529546278604066)

# Two Sample Proportion

In [11]:
def two_samp_proportion(p1, p2, n1, n2):
    p_pool = ((p1*n1) + (p2*n2))/(n1 + n2)
    x = (p_pool*(1-p_pool)*((1/n1)+(1/n2)))
    s = math.sqrt(x)
    z = (p1 - p2)/s
    p_val = stats.norm.cdf(z)
    if(z >= 0):
        p_val = 1 - p_val
    
    return z, p_val*2

In [12]:
two_samp_proportion(0.27, 0.19, 100, 100)

(1.3442056254198995, 0.17888190308175567)

In [13]:
stats.norm.cdf(1.3442056254198995)

0.9105590484591222

# For two variances

In [16]:
stats.f.ppf(q=1-0.05, dfn = 15, dfd = 10)

2.8450165269958436

In [17]:
stats.f.ppf(q=0.05, dfn = 15, dfd = 10)

0.3931252536255495

In [18]:
x = [3, 7, 25, 10, 15, 6, 12, 25, 15, 7]
y = [48, 44, 40, 38, 33, 21, 20, 12, 1, 18]

In [19]:
F = np.var(x)/np.var(y)
dfn = len(x) - 1
dfd = len(y) - 1

In [20]:
p_val = stats.f.cdf(F, dfn, dfd)

In [21]:
p_val

0.024680183438910465

##### Determining sample size for a hypothesis test about a population mean

In [22]:
def sample_size(alfa, beta, mu1, mu2, sigma):
    z1 = -1 * stats.norm.ppf(alfa)
    z2 = -1 * stats.norm.ppf(beta)
    n = (((z1+z2)**2)*(sigma**2))/((mu1-mu2)**2)
    print(n)

In [23]:
sample_size(0.05, 0.1, 12, 12.75, 3.2)

155.900083325938
