In [6]:
import scipy.stats as st
from numpy import sqrt, ceil
from typing import Tuple

In [2]:
def get_critical_z(confidence_level: float):
    return st.norm.ppf(confidence_level + (1-confidence_level)/2)

In [3]:
def find_error(
    proportion: float,
    confidence_level: float,
    n: int
) -> float:
    critical_z = get_critical_z(confidence_level)
    return critical_z*sqrt(proportion*(1-proportion)/n)

In [7]:
def get_traditional_confidence_interval(
    proportion: float, 
    confidence_level: float, 
    n: int
) -> Tuple[float, float]:
    ϵ = find_error(proportion=proportion, confidence_level=confidence_level, n=n)
    return (proportion-ϵ, proportion+ϵ)

In [8]:
get_traditional_confidence_interval(proportion=280/418, confidence_level=0.95, n=418)

(0.6247745576792417, 0.7149383609810455)

In [10]:
def find_sample_size_with_error(
    proportion: float, 
    confidence_level: float,
    ϵ: float
) -> int:
    z = get_critical_z(confidence_level=confidence_level)
    return ceil(z**2 * proportion * (1-proportion) / ϵ**2)

In [11]:
find_sample_size_with_error(proportion=0.8, confidence_level=0.95, ϵ=0.03)

683.0

In [12]:
find_sample_size_with_error(proportion=0.5, confidence_level=0.95, ϵ=0.03)

1068.0

In [13]:
def get_score_confidence_interval(
    proportion: float, 
    confidence_level: float,
    n: int
) -> Tuple[float, float]:
    z = get_critical_z(confidence_level=confidence_level)
    ϵ = z * sqrt(proportion*(1-proportion)/n + z**2/(4*n**2))
    return ((proportion + z**2/(2*n) - ϵ)/(1+z**2/n), (proportion + z**2/(2*n) + ϵ)/(1+z**2/n))

In [14]:
get_score_confidence_interval(proportion=1/3, confidence_level=0.95, n=48)

(0.21676780694818568, 0.4745989029405201)

In [15]:
def get_width_of_traditional_ci(
    proportion: float, 
    confidence_level: float, 
    n: int
) -> float:
    return 2 * get_critical_z(confidence_level) * sqrt(proportion*(1-proportion)/n)

In [16]:
def get_sample_size_from_width_traditional_ci(
    proportion: float, 
    confidence_level: float, 
    width: float
) -> int:
    z = get_critical_z(confidence_level)
    return 4 * z**2 * proportion * (1-proportion) / width**2

In [17]:
def get_sample_size_from_width_score_ci(
    proportion: float,
    confidence_level: float, 
    width: float
) -> int:
    z = get_critical_z(confidence_level)
    return (2*z**2*proportion*(1-proportion) - z**2*width**2 + sqrt(4*z**4*proportion*(1-proportion)*(proportion*(1-proportion) - width**2)+width**2*z**4))/width**2

In [18]:
get_sample_size_from_width_score_ci(proportion=0.5, confidence_level=0.99, width=0.05)

2647.3237438074634

In [20]:
def find_confidence_interval_for_two_proportions(
    p1: float, 
    p2: float, 
    confidence_level: float, 
    n1: int, 
    n2: int
) -> Tuple[float, float]:
    ϵ = get_critical_z(confidence_level) * sqrt(p1*(1-p1)/n1 + p2*(1-p2)/n2)
    return (p1-p2-ϵ, p1-p2+ϵ)

In [21]:
find_confidence_interval_for_two_proportions(p1=0.4, 
                                             p2=0.17, 
                                             confidence_level=0.95, 
                                             n1=2100, 
                                             n2=1900)

(0.20308712709186322, 0.2569128729081368)

In [22]:
find_confidence_interval_for_two_proportions(p1=0.516, 
                                             p2=0.311, 
                                             confidence_level=0.95,
                                             n1=62, 
                                             n2=61)

(0.03479995595572377, 0.37520004404427626)

In [23]:
10/25


0.4

In [24]:
get_traditional_confidence_interval(proportion=0.4, confidence_level=0.95, n=25)

(0.20796353294578765, 0.5920364670542124)

In [25]:
get_score_confidence_interval(proportion=0.4, confidence_level=0.95, n=25)

(0.2340330237774797, 0.5926054264103301)

In [26]:
get_sample_size_from_width_traditional_ci(proportion=0.53, confidence_level=0.99, width=0.05)

2644.4043893030143

In [27]:
get_sample_size_from_width_traditional_ci(proportion=0.5, confidence_level=0.99, width=0.05)

2653.958640408485

In [28]:
get_sample_size_from_width_score_ci(proportion=0.5, confidence_level=0.99, width=0.05)

2647.3237438074634

In [33]:
find_confidence_interval_for_two_proportions(p1=750/1501, p2=407/958, confidence_level=0.99, n1=1501, n2=958)

(0.021932980135674025, 0.12771394974683992)

In [30]:
get_sample_size_from_width_score_ci(proportion=2/3,confidence_level=0.95,width=0.1)

338.10105645188963

In [32]:
st.norm.ppf(.975)

1.959963984540054

In [34]:
st.norm.ppf(.9)

1.7506860712521692