In [1]:
from scipy.stats import norm, t

In [2]:
def se_for_two_means(s1, s2, n1, n2):
    return pow(s1*s1/n1 + s2*s2/n2, 0.5)

In [3]:
def se_for_two_proportions(p1, p2, n1, n2):
    return pow(p1*(1-p1)/n1 + p2*(1-p2)/n2, 0.5)

In [4]:
def df_for_two_means(s1, s2, n1, n2):
    numerator = float((s1**2/n1 + s2**2/n2)**2)
    denominator = (s1**2/n1)**2/(n1-1) + (s2**2/n2)**2/(n2-1)
    return numerator/denominator

## Cruise Ship Rating Example

#### population 1: ships that carry fewer than 500 passengers
$\bar{X}_1$ = 85.36, n1 = 37, $\sigma_1$ = 4.55

#### population 1: ships that carry fewer than 500 passengers
$\bar{X}_2$ = 81.40, n2 = 44, $\sigma_2$ = 3.97

In [5]:
x1 = 85.36; n1 = 37; s1 = 4.55; x2 = 81.40; n2 = 44; s2 = 3.97; alpha = 0.05

In [6]:
SE = se_for_two_means(s1, s2, n1, n2)
crit_z = norm.ppf(1 - alpha/2)
MOE = crit_z*SE
print SE, crit_z, MOE

0.957981889053 1.95996398454 1.87761000039


In [7]:
[x1-x2 - MOE, x1-x2 +MOE]

[2.0823899996137056, 5.8376100003862819]

#### Suppose we want to test whether small cruise ships have better rating than large cruise ships.
$H_0: \mu_1 - \mu_2 \leq 0$

$H_1: \mu_1 - \mu_2 > 0$

In [8]:
z = (x1-x2)/SE
p_value = 1 - norm.cdf(z)
print z, p_value

4.13368983824 1.78492582694e-05


## Commute Distance Example

In [9]:
x1 = 22.5; n1 = 50; s1 = 8.4; x2 = 18.6; n2 = 40; s2 = 7.4; alpha = 0.05

In [10]:
SE = se_for_two_means(s1, s2, n1, n2)
df = df_for_two_means(s1, s2, n1, n2)
t = t.ppf(1 - alpha/2, df)
MOE = t*SE
print SE, df, t, MOE

1.66739317499 87.1441817401 1.98756191451 3.31404717113


In [11]:
[x1-x2 - MOE, x1-x2 +MOE]

[0.58595282887375699, 7.2140471711262402]

## PGA 6-foot Putt Example

In [12]:
p1 = 688./1075; n1 = 1075; p2 = 696./1200; n2 = 1200; alpha = 0.05
print p1, p2

0.64 0.58


In [13]:
z = norm.ppf(1-alpha/2)
SE = se_for_two_proportions(p1, p2, n1, n2)
MOE = z*SE
print z, SE, MOE

1.95996398454 0.020428548196 0.0400392187206


In [14]:
[p1-p2 - MOE, p1-p2+MOE]

[0.019960781279444742, 0.10003921872055536]