# Content
- Testing the difference between two means of **independent samples**
    - Using z test
    - Using t test
- Testing the difference between two means of **dependent samples**
- Testing the difference between two proportions
- Testing the difference between two variances and standard deviations

# Overview
Hypothesis testing is used to compare a sample statistic to a population parameter (e.g. H0: μ = 82)

Here we want to compare 2 sample statistics (e.g. H0: μ1 = μ2).
It is very useful to for example compare the wage of workers between 2 factories.

One question to answer is whether the 2 samples are dependent or independent (i.e. is there a relationship between the subject of each sample?)

In [1]:
# Imports
from scipy import stats
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
import pylab

In [2]:
# Generic functions

# Same as is chap. 8
def preferred_rounding(v, decimals=2):
    return str(round(v, decimals))

# Same as is chap. 8
def display_probability_density_chart(
    cv_left=None,
    cv_right=None,
    test_value=None,
):
    r'''
    Display probability density chart.
    '''  
    fig, ax = plt.subplots(1, 1, figsize=(11, 5))
    
    # Standard normal distribution curve
    x = np.linspace(
        stats.norm.ppf(0.001),
        stats.norm.ppf(0.999), 
        100
    )
    curve = stats.norm.pdf(x)
        
    ax.plot(
        x, 
        curve,
        'r-', 
        color='#000000',
        lw=3, 
        alpha=0.8,
        label='Standard normal distribution curve'
    )
    
    # Critical region(s) in chart
    if cv_left:
        critical_region_left = np.linspace(-3.1, cv_left, 100)
        ax.fill_between(
            critical_region_left, 
            stats.norm.pdf(critical_region_left), 
            color='#f4c242', 
            alpha=0.5,
        )
        critical_value_left = np.linspace(cv_left-0.01, cv_left+0.01, 100)
        ax.fill_between(
            critical_value_left, 
            stats.norm.pdf(critical_value_left), 
            color='#f4c242', 
            alpha=1, 
            label='C.V. = ' + preferred_rounding(cv_left)
        )
        
    if cv_right:
        critical_region_right = np.linspace(cv_right, 3.1, 100)
        critical_value_right = np.linspace(cv_right-0.01, cv_right+0.01, 100)
        ax.fill_between(
            critical_value_right, 
            stats.norm.pdf(critical_value_right), 
            color='#f4c242', 
            alpha=1, 
            label='C.V. = ' + preferred_rounding(cv_right)
        )
        ax.fill_between(
            critical_region_right, 
            stats.norm.pdf(critical_region_right), 
            color='#f4c242', 
            alpha=0.5,
        )
       
    # Test value
    if test_value:
        ax.axvline(
            x=test_value, 
            linewidth=1.5, 
            color='#ff6666',
        )
        ax.text(
            x=test_value+.1, 
            y=0.2, 
            s='test value =' + preferred_rounding(test_value), 
            rotation=90,
        )
        
    ax.legend()
    plt.show()
    

# Same as is chap. 8
def get_zscore_from_probability(
    p_left=None, # probability left side of target range (if any)
    p_right=None, # probability right side of target range (if any)    
):
    r'''
    Calculate z-score from table (with ppf).
    '''
    test_value = 0
    if p_left and not p_right:
        test_value = stats.norm.ppf(p_left)
        
    elif p_right and not p_left:
        test_value = 1 - stats.norm.ppf(p_right)
    
    elif p_left and p_right:
        test_value_left = stats.norm.ppf(p_left)
        test_value_right = stats.norm.ppf(p_right)
        test_value = test_value_right - test_value_left
    
    return test_value


# Same as is chap. 8
def get_tscore_from_probability(
    p_left=None, # probability left side of target range (if any)
    p_right=None, # probability right side of target range (if any)
    df=0,
):
    r'''
    Calculate t-score from table (with ppf).
    '''
    test_value = 0
    if p_left and not p_right:
        test_value = stats.t.ppf(p_left, df)
        
    elif p_right and not p_left:
        test_value = 1 - stats.t.ppf(p_right, df)
    
    elif p_left and p_right:
        test_value_left = stats.t.ppf(p_left, df)
        test_value_right = stats.t.ppf(p_right, df)
        test_value = test_value_right - test_value_left
    
    return test_value

# Testing difference between 2 means

## Using z test

### Example

H0: μ1 = μ2 => μ1 - μ2 = 0

H1: μ1 ≠ μ2 => μ1 - μ2 ≠ 0

μ1 = mean wage workers factory A
μ2 = mean wage workers factory B

### Assumptions
1. Both samples are randomly selected
2. Samples are independent from each other
3. **STD of both populations are known** (this is unusual in practice)
4. Both sample size >= 30 or both samples follow a normal distribution

### Formulas

The z test is based on the format: test value = [(observed value) - (expected value)]/(standard error)

<img src="attachment:z%20Test.jpg" width="350" align="left">

If we hypothesize a difference of 0, then we reject H0 only if CI does NOT include 0.

<img src="attachment:CI.jpg" width="550" align="left">

In [None]:
def compute_difference_between_two_means(
    dataset1=None,
    n1=None,
    sample_mean1=None,
    sample_std1=None,
    population_std1=None,
    
    dataset2=None,
    n2=None,
    sample_mean2=None,
    sample_std2=None,
    population_std2=None,
    
    tail='two-tailed',
    significance_level=.05,
    ddof=1,
    show_chart=False,
):
    r"""
    Conduct a statistical hypothesis test comparing two sample means. 
    Works for σ known and unknown.
    The claim is the alternative hypothesis and the problem statement has to be formulated as such.
    """
    
    # Conditions to stop
    assert tail in ('two-tailed', 'right-tailed', 'left-tailed')
    
    if dataset1 and dataset2:
        dataset_np1 = np.array(dataset1)
        n1 = len(dataset_np1) # sample size          
        sample_mean1 = np.mean(dataset_np1) # sample mean
        sample_std1 = np.std(dataset_np1, ddof=ddof) # sample STD
        
        dataset_np2 = np.array(dataset2)
        n1 = len(dataset_np1) # sample size          
        sample_mean2 = np.mean(dataset_np2) # sample mean
        sample_std2 = np.std(dataset_np2, ddof=ddof) # sample STD
        
    df1 = n1-ddof # degree of freedom
    df2 = n2-ddof # degree of freedom
    
    # Parameters and statistics
    print('Parameters and statistics')    
    print('-------------------------')
    print('Sample mean 1 (μ1) =', sample_mean1)
    print('Sample STD 1 =', sample_std1)        
    print('Sample size 1 =', n1)
    print('Population STD 1 =', population_std1)
    
    print('Sample mean 2 (μ2) =', sample_mean2)
    print('Sample STD 2 =', sample_std2)        
    print('Sample size 2 =', n2)
    print('Population STD 2 =', population_std2)
    
    print('Significance level =', significance_level)
    print('Test type =', tail)
    
    # 1. Hypothesis
    print()
    print('1. Hypothesis')
    if tail == 'two-tailed':
        operator = '≠'
        operator_label = 'different from'
    elif tail == 'right-tailed':
        operator = '>'
        operator_label = 'more than'
    elif tail == 'left-tailed':
        operator = '<'
        operator_label = 'less than'
    
    # Statement
    h0 = 'H0: μ1 = μ2; H0: μ1-μ2 = 0'
    h1 = 'H1: μ1 {operator} μ2; H1: μ1-μ2 {operator} 0'.format(operator)   
    print(h0)
    print(h1, '(claim)')
    
    # 2. Critical value(s)
    print()
    print('2. Critical value(s)')
    cv_left, cv_right = None, None
    tailed_alpha = significance_level
    
    if population_std1 and population_std2: # When σ1 and σ2 known
        if tail == 'two-tailed':
            tailed_alpha = significance_level/2
            cv_right = abs(get_zscore_from_probability(p_left=tailed_alpha))
            cv_left = -cv_right
            print('C.V. =', '±' + preferred_rounding(cv_right), '(two-tailed)')
        elif tail == 'right-tailed':
            cv_right= get_zscore_from_probability(p_right=tailed_alpha)
            print('C.V. =', preferred_rounding(cv_right), '(right-tailed)')
        elif tail == 'left-tailed':
            cv_left = get_zscore_from_probability(p_left=tailed_alpha)
            print('C.V. =', preferred_rounding(cv_left), '(left-tailed)')
            
    else: # When σ1 and σ2 unknown
        if tail == 'two-tailed':
            tailed_alpha = significance_level/2
            cv_right = abs(get_tscore_from_probability(p_left=tailed_alpha, df=df))
            cv_left = -cv_right
            print('C.V. =', '±' + preferred_rounding(cv_right), '(two-tailed)')
        elif tail == 'right-tailed':
            cv_right= get_tscore_from_probability(p_right=tailed_alpha, df=df)
            print('C.V. =', preferred_rounding(cv_right), '(right-tailed)')
        elif tail == 'left-tailed':
            cv_left = get_tscore_from_probability(p_left=tailed_alpha, df=df)
            print('C.V. =', preferred_rounding(cv_left), '(left-tailed)')
    
    # 3. Test value, P-value and chart
    print()
    print('3. Test value, P-value and chart')
    
    # Hypothesis test for comparing two means
    if population_std: # When σ1 and σ2 known
        print('Test type: z test')
        standard_error = np.sqrt((population_std1n))
        test_value = (sample_mean-population_mean)/standard_error
    else: # When σ1 and σ2 unknown 
        print('Test type: t test')
        standard_error = sample_std/np.sqrt(n)
        test_value = (sample_mean-population_mean)/standard_error 

    # Calculate P-value
    if population_std: # When σ known
        p_value = stats.norm.sf(abs(test_value))     
    else: # When σ unknown 
        p_value = stats.t.sf(abs(test_value), df=df) 
    if tail == 'two-tailed':   
        p_value = p_value*2 
    print('test value = ', preferred_rounding(test_value))
    print('P-value = ', preferred_rounding(p_value, 4))
    
    # Chart
    if show_chart:
        display_probability_density_chart(
            cv_left=cv_left,
            cv_right=cv_right,
            test_value=test_value,
        )
        
    # 4a. Result and decision (traditional method)
    print()
    print('4a. Result and decision (traditional method)')
    
    decision_statement = 'reject'
    evidence_statement = 'enough evidence'
    if cv_left and cv_right:
        if (test_value < 0 and cv_left < test_value) or (test_value > 0 and cv_right > test_value):
            decision_statement = 'not reject'
            evidence_statement = 'not enough evidence'
    elif cv_left:
        if cv_left < test_value:
            decision_statement = 'not reject'
            evidence_statement = 'not enough evidence'
    elif cv_right == 'right-tailed':
        if cv_right > test_value:
            decision_statement = 'not reject'
            evidence_statement = 'not enough evidence'
            
    print('The decision is to', decision_statement, 'the null hypothesis.')
    
    claim = 'μ is ' + operator_label + ' ' + str(population_mean)
    print('There is', evidence_statement, 'to support the claim that', claim, 
          'with a significance level of', str(significance_level) + '.')
  
    print()
    if decision_statement == 'reject':
        print('Although the difference between sample mean and population mean is said to be significant, there is a risk of a type I error',
              '(rejecting H0 althought it is true).')
        print('In this case, the probability of a type I error is', '{:0.0f}%'.format(significance_level*100) + '.')
        print('You could try to reduce the significance level to decrease the probability of a type I error.')
        
    elif decision_statement == 'not reject':
        print('Although the difference between sample mean and population mean is not significant, there is a risk of a type II error',
              '(not rejecting H0 althought it is false).')
        print('You could try to increase the significance level to decrease the probability of a type II error.', 
              'However, this would increase the probability of a type I error')
        
        if sample_mean_supports_claim:
            print('Note that even though sample mean of', sample_mean, 'is', operator_label, population_mean,
                 'it is not significantly different and might be due to chance.')
            
    
    # 4b. Result and decision (P-value method)
    print()
    print('4b. Result and decision (P-value method)')
    if p_value <= tailed_alpha:
        decision_statement_p_value_method = 'reject'
        print('P-value ≤ ∝, therefore we reject H0.')
    else:
        decision_statement_p_value_method = 'not reject'
        print('P-value > ∝, therefore we do NOT reject H0.')  
        
    # 5. Summary
    print()
    print('5. Summary')
    if decision_statement == decision_statement_p_value_method:
        print('The traditional method and P-value method support the same conclusion.')
    else:
        print('The traditional method and P-value method do NOT support the same conclusion.')