# Chapter 7: Hypothesis and Inference

*null hypothesis*  $H_{0}$ and the alternative hypothesis $H_{1}$. Use statistics to decide whether we can reject the null hypothesis or not.

In [1]:
import math

In [2]:
def normal_approximation_to_binomial(n, p):
    """fins mu and sigma corresponding to a Binomial(n, p)"""
    mu = p * n
    sigma = math.sqrt(p * (1 - p) * n)
    return mu, sigma

In [3]:
def normal_cdf(x, mu=0,sigma=1):
    return (1 + math.erf((x - mu) / math.sqrt(2) / sigma)) / 2

In [10]:
def inverse_normal_cdf(p, mu=0, sigma=1, tolerance=0.00001):
    """find approximate inverse using binary search"""
    
    # if not standard, compute standard and rescale
    if mu != 0 and sigma != 1:
        return mu + sigma * inverse_normal_cdf(p, tolerance=tolerance)
    
    low_z = -10.0
    high_z = 10.0
    while high_z - low_z > tolerance:
        mid_z = (low_z + high_z) / 2 # consider the midpoint
        mid_p = normal_cdf(mid_z)
        if mid_p < p:
            low_z = mid_z
        elif mid_p > p:
            high_z = mid_z
        else:
            break
    return mid_z   

In [5]:
normal_probability_below = normal_cdf

In [6]:
def normal_probability_above(lo, mu=0, sigma=1):
    return 1 - normal_cdf(lo, mu, sigma)

In [7]:
def normal_probability_between(lo, hi, mu=0, sigma=1):
    return normal_cdf(hi, mu, sigma) - normal_cdf(lo, mu, sigma)

In [8]:
def normal_probability_outside(lo, hi, mu=0, sigma=1):
    return 1 - normal_probability_between(lo, hi, mu, sigma)

In [9]:
def normal_upper_bound(probability, mu=0, sigma=1):
    return inverse_normal_cdf(probability, mu, sigma)

In [15]:
def normal_lower_bound(probability, mu=0, sigma=1):
    return inverse_normal_cdf(1 - probability, mu, sigma)

In [12]:
def normal_two_sided_bounds(probability, mu=0, sigma=1):
    tail_probability = (1 - probability) / 2
    upper_bound = normal_lower_bound(tail_probability, mu, sigma)
    lower_bound = normal_upper_bound(tail_probability, mu, sigma)
    
    return lower_bound, upper_bound

Let's say we flip a coin n = 1000 times. If our hypothesis of fairness is true, X should be distributed approximately normally with mean 500 and standard deviation 15.8:

In [13]:
mu_0, sigma_0 = normal_approximation_to_binomial(1000, 0.5)

### Type 1 Error
Reject $H_0$ even through it's true i.e. false positive. <br>
Bounds for significance is usually 5% or 1%. Let's choose 5%

In [22]:
acceptable_range = normal_two_sided_bounds(0.95, mu_0, sigma_0)
print ((round(acceptable_range[0]), round(acceptable_range[1])))

(469, 531)


### Type 2 Error
Fail to reject $H_0$ even though it's false i.e. false negative.

In [23]:
# 95% bounds based on assumption p is 0.5
lo, hi = normal_two_sided_bounds(0.95, mu_0, sigma_0)

In [24]:
# actual mu and sigma based on p = 0.55
mu_1, sigma_1 = normal_approximation_to_binomial(1000, 0.55)

In [29]:
# a type 2 error means we fail to reject the null hypothesis
# which will happen when X is still in our original interval
type_2_probability = normal_probability_between(lo, hi, mu_1, sigma_1)
power = 1 - type_2_probability
print(round(power,3))

0.887


In [31]:
# look at probability the coins is not biased towards heads i.e. p <= 0.5
hi = normal_upper_bound(0.95, mu_0, sigma_0)
print(hi)

526.0073585242053


In [32]:
type_2_probability = normal_probability_below(hi, mu_1, sigma_1)
power = 1 - type_2_probability
print(power)

0.9363794803307173
