### Two Sample Tests

In [1]:
import numpy as np
import pandas as pd
import math
from scipy import stats

### Population Means, Independent Samples (sigma1 and sigma2 are known)

In [2]:
def stati(x1,x2,sigma1,sigma2,n1,n2):
    z=(x1-x2)/(math.sqrt(((sigma1**2)/n1)+((sigma2**2)/n2)))
    if (z<0):
        p=stats.norm.cdf(z)
    else:
        p=1-stats.norm.cdf(z)
        print(z,p)

In [3]:
stati(121,112,8,8,10,10) #Z and P value

2.5155764746872635 0.00594189462107364


### Population Means, Independent Samples (sigma1 and sigma2 are unknown)

In [4]:
b=[89.19,90.95,90.46,93.21,97.19,97.04,91.07,92.75]
a=[91.5,94.18,92.18,95.39,91.79,89.07,94.72,89.21]

In [5]:
stats.ttest_ind(a,b,equal_var=True)

Ttest_indResult(statistic=-0.3535908643461798, pvalue=0.7289136186068217)

In [6]:
stats.t.ppf(0.025,14) #Critical t value

-2.1447866879169277

### Population Means, Independent Samples (sigma1 and sigma2 are unknown and unequal)

In [7]:
stats.t.ppf(0.025,13)  # Critical t value

-2.160368656461013

In [8]:
m=[3,7,25,10,15,6,12,25,15,7]
r=[48,44,40,38,33,21,20,12,1,18]

In [9]:
stats.ttest_ind(m,r,equal_var=False)

Ttest_indResult(statistic=-2.7669395785560558, pvalue=0.015827284816100885)

### Population Means Dependent Samples

In [10]:
K=[1.186,1.151,1.322,1.339,1.200,1.402,1.365,1.537,1.559]
L=[1.061,0.992,1.063,1.062,1.065,1.178,1.037,1.086,1.052]

In [11]:
stats.ttest_rel(K,L)

Ttest_relResult(statistic=6.0819394375848255, pvalue=0.00029529546278604066)

### Two sample tests, Inference about the difference between Two Population Proportions

In [12]:
import math
def population_proportion(p1,p2,n1,n2):
    p=((p1*n2)+(p2*n2))/(n1+n2)
    x=(p*(1-p)*((1/n1)+(1/n2)))
    s=math.sqrt(x)
    z=(p1-p2)/s
    if (z<0):
        p_value=stats.norm.cdf(z)
    else:
        p_value=1-stats.norm.cdf(z)
    return z, p_value*2

In [13]:
population_proportion(0.27,0.19,100,100)

(1.3442056254198995, 0.17888190308175567)

In [14]:
stats.norm.cdf(1.3442056254198995)

0.9105590484591222

### Two sample Tests for Population Variances

In [15]:
import scipy

In [16]:
scipy.stats.f.ppf(q=1-0.05,dfn=15,dfd=10) #Upper Limit

2.8450165269958436

In [17]:
scipy.stats.f.ppf(q=0.05,dfn=15,dfd=10) #Lower Limit

0.3931252536255495

### F-Test Example

In [18]:
x=[3,7,25,10,15,6,12,25,15,7]
y=[48,44,40,38,33,21,20,12,1,18]

In [19]:
F=np.var(x)/np.var(y)

In [20]:
dfn=len(x)-1
dfd=len(y)-1

In [21]:
p_value=scipy.stats.f.cdf(F,dfn,dfd)
p_value

0.024680183438910465

### Determining the Sample Size for a Hypothesis Test from a Population Mean

In [22]:
def samplesize(alfa,beta,m1,m2,sigma):
    z1=-1*stats.norm.ppf(alfa)
    z2=-1*stats.norm.ppf(beta)
    n=(((z1+z2)**2)*(sigma**2))/((m1-m2)**2)
    print(n)

In [23]:
samplesize(0.05,0.1,12,12.75,3.2) #Value of Sample Size

155.900083325938
