# Confidence Intervals for Two Means: $\mu_1 - \mu_2$

## Imports

In [8]:
import scipy.stats as st
from numpy import sqrt
from typing import Tuple

## Case 1: Populations are Independent and Normally Distributed with Common Variance $\sigma^2$

In [6]:
def get_pooled_sample_variance(
    sample_variance_x: float,
    sample_variance_y: float,
    sample_size_n: int,
    sample_size_m: int
) -> float:
    return ((sample_size_n - 1) * sample_variance_x + 
            (sample_size_m - 1) * sample_variance_y) / \
            (sample_size_n + sample_size_m -2)

In [7]:
def get_t_critical_value_for_two_means_with_same_variance(
    confidence_level: float,
    sample_size_n: int,
    sample_size_m: int
) -> float:
    return st.t.ppf(q=confidence_level + (1 - confidence_level) / 2,
                    df=sample_size_n + sample_size_m - 2)

In [9]:
def get_margin_of_error_for_mean_difference_with_same_variance(
    confidence_level: float,
    sample_variance_x: float,
    sample_variance_y: float,
    sample_size_n: int,
    sample_size_m: int
) -> float:
    critical_t_value = get_t_critical_value_for_two_means_with_same_variance(
        confidence_level=confidence_level,
        sample_size_n=sample_size_n,
        sample_size_m=sample_size_m
    )
    pooled_variance = get_pooled_sample_variance(
        sample_variance_x=sample_variance_x,
        sample_variance_y=sample_variance_y,
        sample_size_n=sample_size_n,
        sample_size_m=sample_size_m
    )
    return critical_t_value * sqrt(pooled_variance) * sqrt(1/sample_size_n + 1/sample_size_m)

In [12]:
def get_confidence_interval_for_mean_difference_with_same_variance(
    mean_x: float,
    mean_y: float,
    confidence_level: float,
    sample_variance_x: float,
    sample_variance_y: float,
    sample_size_n: int,
    sample_size_m: int
) -> Tuple[float, float]:
    
    margin_of_error = get_margin_of_error_for_mean_difference_with_same_variance(
        confidence_level=confidence_level,
        sample_variance_x=sample_variance_x,
        sample_variance_y=sample_variance_y,
        sample_size_n=sample_size_n,
        sample_size_m=sample_size_m
    )
    
    return (mean_x - mean_y - margin_of_error, 
            mean_x - mean_y + margin_of_error)

In [13]:
get_confidence_interval_for_mean_difference_with_same_variance(
    mean_x=10.26,
    mean_y=9.02,
    confidence_level=0.95,
    sample_variance_x=6.32,
    sample_variance_y=3.6,
    sample_size_n=10,
    sample_size_m=10
)

(-0.8525014771361277, 3.332501477136128)

## Case 2: Populations are Independent and Normally Distributed with Unequal Variances ($\sigma_X^2 \ne \sigma_Y^2$)

In [16]:
def get_t_score(
        confidence_level: float,
        degree_of_freedom: int
) -> float:
    return st.t.ppf(q=confidence_level + (1 - confidence_level) / 2, df=degree_of_freedom)

In [20]:
def get_degree_of_freedom(
    sample_variance_x: float,
    sample_variance_y: float,
    sample_size_n: int,
    sample_size_m: int
) -> int:
    return int((sample_variance_x/sample_size_n + sample_variance_y/sample_size_m)**2 / \
               ((sample_variance_x/sample_size_n)**2/(sample_size_n-1) +  
                (sample_variance_y/sample_size_m)**2/(sample_size_m-1)))

In [21]:
get_degree_of_freedom(sample_variance_x=6.32, 
                      sample_variance_y=3.60, 
                      sample_size_n=10,
                      sample_size_m=10)

16

In [22]:
def get_margin_of_error_for_mean_difference_with_different_variance(
    confidence_level: float,
    sample_variance_x: float,
    sample_variance_y: float,
    sample_size_n: int,
    sample_size_m: int
) -> float:
    degree_of_freedom = get_degree_of_freedom(
        sample_variance_x=sample_variance_x,
        sample_variance_y=sample_variance_y,
        sample_size_n=sample_size_n,
        sample_size_m=sample_size_m
    )
    critical_t = get_t_score(confidence_level=confidence_level, degree_of_freedom=degree_of_freedom)
    return critical_t * sqrt(sample_variance_x/sample_size_n + sample_variance_y/sample_size_m)

In [23]:
def get_confidence_interval_for_mean_difference_with_different_variance(
    mean_x: float,
    mean_y: float,
    confidence_level: float,
    sample_variance_x: float,
    sample_variance_y: float,
    sample_size_n: int,
    sample_size_m: int
) -> Tuple[float, float]:
    margin_of_error = get_margin_of_error_for_mean_difference_with_different_variance(
        confidence_level=confidence_level,
        sample_variance_x=sample_variance_x,
        sample_variance_y=sample_variance_y,
        sample_size_n=sample_size_n,
        sample_size_m=sample_size_m
    )
    return (mean_x - mean_y - margin_of_error, 
            mean_x - mean_y + margin_of_error)

In [24]:
get_confidence_interval_for_mean_difference_with_different_variance(
    mean_x=10.26,
    mean_y=9.02,
    confidence_level=0.95,
    sample_variance_x=6.32,
    sample_variance_y=3.60,
    sample_size_n=10,
    sample_size_m=10
)

(-0.871408650603668, 3.3514086506036684)

## Case 3: Populations are Dependent and Normally Distributed
* Find the difference, $\mu_D = \mu_X - \mu_Y$
* Use the t-interval for one mean, $\mu_D$

$\bar{d} \pm t_{\frac{\alpha}{2}, n-1}\big(\frac{s_d}{\sqrt{n}}\big)$