In [185]:
import math
from scipy.stats import norm
from IPython.display import display, Latex

def print_latex(string):
    display(Latex(string))

def solution_standardization_population(lower=None,upper=None,probability=None):
    if lower is None and upper is not None:
        print_latex(f"$P(X \\le {{{upper}}}) = {probability}$")
        print_latex(f"$P(\\frac{{X - \\mu}}{{\\sigma}} \\le \\frac{{{{{upper}}}-\\mu}}{{\\sigma}})  = {probability}$")
        print_latex(f"Let $Z=\\frac{{X - \\mu}}{{\\sigma}}$, then $Z\\approx N(0,1)$. Substituting Z, we get")
        print_latex(f"$P(Z \\le \\frac{{{{{upper}}}-\\mu}}{{\\sigma}})  = {probability}$")
        print("Taking the inverse of the cumulative distribution function, we get:")
        print_latex(f"$\\frac{{{{{upper}}}-\\mu}}{{\\sigma}} = $ norm.ppf(${probability}$)")
    elif lower is not None and upper is None:    
        print_latex(f"$P(X \\ge {{{lower}}}) = {probability}$")
        print_latex(f"$P(X \\le {{{lower}}}) = 1-{probability}$")
        print_latex(f"$P(\\frac{{X - \\mu}}{{\\sigma}} \\le \\frac{{{{{lower}}}-\\mu}}{{\\sigma}})  = 1 - {probability}$")
        print_latex(f"Let $Z=\\frac{{X - \\mu}}{{\\sigma}}$, then $Z\\approx N(0,1)$. Substituting Z, we get")
        print_latex(f"$P(Z \\le \\frac{{{{{lower}}}-\\mu}}{{\\sigma}})  = 1 - {probability}$")
        print("Taking the inverse of the cumulative distribution function, we get:")
        print_latex(f"$\\frac{{{{{lower}}}-\\mu}}{{\\sigma}} = $ norm.ppf($1 - {probability}$)")

def probability_population(mean=None,sd=None,lower=None,upper=None,probability=None):
    if mean is None:
        if probability is not None and sd is not None and (lower is not None or upper is not None):
            if lower is not None and upper is not None:
                print(f"Cannot calculate sample size with both lower and upper bounds")
            elif lower is not None:
                solution_standardization_population(lower=lower,probability=probability)
                print("Solving for the mean, we get:")
                print_latex(f"$\\mu= {lower} - $ norm.ppf( $1-{probability} $) $* {sd} \\approx {lower - norm.ppf(1-probability) * sd }$")
                print(f"The mean is {lower + norm.ppf(probability, loc=0, scale=1) * sd }")
            elif upper is not None:
                solution_standardization_population(upper=upper,probability=probability)
                print("Solving for the mean, we get:")
                print_latex(f"$\\mu= {upper} - $ norm.ppf( $ {probability} $) $* {sd} \\approx {upper - norm.ppf(probability) * sd }$")
                print(f"The mean is {upper - norm.ppf(probability) * sd}")
        else:
            print("To calculate the mean, please provide the probability, standard deviation, and upper or lower bound.")
    elif sd is None:
        if probability is not None and mean is not None and (lower is not None or upper is not None):
            if lower is not None and upper is not None:
                print(f"Cannot calculate sample size with both lower and upper bounds")
            elif lower is not None:
                solution_standardization_population(lower=lower,probability=probability)
                print("Solving for the standard deviation, we get:")
                print_latex(f"$ \\sigma= \\frac{{ {lower} - {mean} }} {{ norm.ppf(1- {probability}) }} \\approx {abs(abs(lower - mean)/norm.ppf(1-probability))} $")
                print(f"The standard deviation is {abs(abs(lower - mean)/norm.ppf(1-probability, loc=0, scale=1) )}")
            elif upper is not None:
                solution_standardization_population(upper=upper,probability=probability)
                print("Solving for the standard deviation, we get:")
                print_latex(f"$ \\sigma= \\frac{{{upper} - {mean}}}{{ norm.ppf({probability}) }}  \\approx {abs(abs(upper - mean)/norm.ppf(probability))} $")
                print(f"The standard deviation is {abs(abs(upper - mean)/norm.ppf(probability, loc=0, scale=1) )}")
        else:
            print("To calculate the standard deviation, please provide the probability, mean, and upper or lower bound.")
    elif probability is None:
        if mean is not None and sd is not None and (lower is not None or upper is not None):
            if lower is None and upper is None:
                print(f"To calculate the probability, provide lower bound or upper bound.")
            elif lower is not None and upper is None:
                print_latex(f"$P(X \\ge {{{lower}}}) = 1-P(X \le {{{lower}}})$")
                print_latex(f"$P(X \\ge {{{lower}}}) = 1- $ norm.cdf(${lower}$,${mean}$,${sd}$) ")
                print_latex(f"$P(X \\ge {{{lower}}}) = {1-norm.cdf(lower,loc=mean,scale=sd)}$")
                print(f"The probability that the X is greater than {lower} is {1-norm.cdf(lower,loc=mean,scale=sd)}")
            elif lower is None and upper is not None:
                print_latex(f"$P(X \\le {{{upper}}}) = $ norm.cdf(${upper}$,${mean}$,${sd}$) ")
                print_latex(f"$P(X \\le {{{upper}}}) = {norm.cdf(upper,loc=mean,scale=sd)}$")
                print(f"The probability that the X is less than {upper} is {norm.cdf(upper,loc=mean,scale=sd)}")
            elif lower is not None and upper is not None:
                print_latex(f"$P({{{lower}}} \\le X \\le {{{upper}}}) = P(X \\le {{{upper}}}) - P(X \\le {{{lower}}})$")
                print_latex(f"$P({{{lower}}} \\le X \\le {{{upper}}}) = $ norm.cdf(${upper}$,${mean}$,${sd}$) - norm.cdf(${lower}$,${mean}$,${sd}$) ")
                print_latex(f"$P({{{lower}}} \\le X \\le {{{upper}}}) = {norm.cdf(upper,loc=mean,scale=sd)-norm.cdf(lower,loc=mean,scale=sd)}$")
                print(f"The probability that the X is between {lower} and {upper} is {norm.cdf(upper,loc=mean,scale=sd)-norm.cdf(lower,loc=mean,scale=sd)}")
        else:
            print("To calculate the probability, please provide the mean, standard deviation, sample size, and lower bound or upper bound.")
    elif lower is None and upper is not None:
        if probability is not None and mean is not None and sd is not None:
            solution_standardization_population(lower="lower",probability=probability)
            print("Solving for the lower bound we get:")
            print_latex(f"$lower={{{mean}}}-norm.ppf({probability})*{sd} \\approx {mean - norm.ppf(probability, loc=0, scale=1) * sd }$")
            print(f"The lower bound is {mean - norm.ppf(probability, loc=0, scale=1) * sd }")
        else:
            print("To calculate the lower bound, please provide the probability, mean, standard deviation, and sample size.")
    elif lower is not None and upper is None:
        if probability is not None and mean is not None and sd is not None:
            solution_standardization_population(upper="upper",probability=probability)
            print("Solving for the upper bound we get:")
            print_latex(f"$upper={{{mean}}}+norm.ppf({probability})*{sd} \\approx {mean + norm.ppf(probability, loc=0, scale=1) * sd }$")
            print(f"The upper bound is {mean + norm.ppf(probability, loc=0, scale=1) * sd}")
        else:
            print("To calculate the upper bound, please provide the probability, mean, standard deviation, and sample size.")
    else:
        print("Please provide any 3 quantities from (probability, mean, standard deviation, sample size) and either upper or lower bound.")  


# mean=0
# sd=1
# lower=-2
# upper=2
# probability=0.9772498680518208
# probability_population(mean=mean,sd=sd,upper=upper)
# probability_population(probability=probability,sd=sd,upper=upper)

# probability_population(mean=mean,sd=sd,lower=lower)
# probability_population(probability=probability,sd=sd,lower=lower)

# probability_population(mean=mean,sd=sd,upper=upper)

# probability_population(mean=mean,probability=probability,upper=upper)
# probability_population(mean=mean,probability=probability,lower=lower)

# probability_population(mean=mean,sd=sd,probability=probability,upper=False)
# probability_population(mean=mean,sd=sd,lower=False,probability=probability)


Module 4.2: Sampling Distribution of the Mean and the CLT

standard error formula

$SE = \frac{\sigma}{\sqrt{n}}$

In [186]:
def standard_error_mean(se=None,sd=None,n=None):
    if se is None and sd is not None and n is not None:
        print_latex(f"Given the formula for the standard error of the mean: $SE=\\frac{{\\sigma}}{{\\sqrt{{n}}}}$. We substitute the values as follows:")
        print_latex(f"$SE=\\frac{{{sd}}}{{\\sqrt{{{n}}}}} \\approx {sd/math.sqrt(n)}$ ")
    elif sd is None and n is not None and se is not None:
        print_latex(f"Given the formula for the standard error of the mean: $SE=\\frac{{\\sigma}}{{\\sqrt{{n}}}}$. We can rearrange as follows to isolate the standard deviation:")
        print_latex(f"$\\sigma=SE\\sqrt n$")
        print("Substituting the values we get:")
        print_latex(f"$\\sigma={se}\\sqrt{{{n}}} \\approx {se*math.sqrt(n)}$")
    elif n is None and se is not None and sd is not None:
        print_latex(f"Given the formula for the standard error of the mean: $SE=\\frac{{\\sigma}}{{\\sqrt{{n}}}}$. We can rearrange as follows to isolate the sample size:")
        print_latex(f"$n=(\\frac{{\\sigma}}{{SE}})^2$")
        print("Substituting the values we get:")
        print_latex(f"$n=(\\frac{{{sd}}}{{{se}}})^2 \\approx {math.pow(sd/se,2)}$")
    else:
        print(f"Please provide two of the three values: se, sd, n")
        
se=1.118033988749895
sd=5
n=20
standard_error_mean(sd=sd,n=n)
standard_error_mean(n=n,se=se)
standard_error_mean(se=se,sd=sd)


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Substituting the values we get:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Substituting the values we get:


<IPython.core.display.Latex object>

Probability that sample mean is within a bound

probability = norm.cdf(lower_bound,mean,se)-norm.cdf(lower_bound,mean,se)

$prob = F_X(lower)-F_X(lower)$

find $\mu$ given upper

$P(\mu\le upper) = prob$

$\frac{upper-\mu}{sd} = F^{-1}_X(prob)$

$upper-\mu = sd*F^{-1}_X(prob)$

$\mu = upper-sd*F^{-1}_X(prob)$

find $\mu$ given lower

$P(\mu\ge lower) = prob$

$P(\mu \le lower) = 1-prob$

$\frac{lower-\mu}{sd} = F^{-1}_X(1-prob)$

$lower-\mu = sd*F^{-1}_X(1-prob)$

$\mu = lower-sd*F^{-1}_X(1-prob)$

find $\mu$ given lower and upper

$P(lower\le \mu \le upper) = prob$
$P(\mu \le upper) - P(\mu\le lower)  = prob$
Not solvable

find sd given upper

$(mean \le upper) = prob$

let $Z \approx N(0,1)$

$(Z \le (\frac{upper-mean}{se})) = prob$

$\frac{upper-mean}{se} = F_X^{-1}(prob)$

$upper-mean = se* F_X^{-1}(prob)$

$upper-mean = \frac{\sigma}{\sqrt{n}}* F_X^{-1}(prob)$

$\sigma = \frac{\sqrt{n}*(upper-mean)}{F_X^{-1}(prob)}$

find sd given lower

$(mean \ge lower) = prob$

$(mean \le lower) = 1-prob$

let $Z \approx N(0,1)$

$(Z \le (\frac{lower-mean}{se})) = 1-prob$

$\frac{lower-mean}{se} = F_X^{-1}(1-prob)$

$lower-mean = se* F_X^{-1}(1-prob)$

$lower-mean = \frac{\sigma}{\sqrt{n}}* F_X^{-1}(1-prob)$

$\sigma = \frac{\sqrt{n}*(lower-mean)}{F_X^{-1}(1-prob)}$





find sd,n,upper 

$(mean \le upper) = prob$

let $Z \approx N(0,1)$

$(Z \le (\frac{upper-mean}{se})) = prob$

$\frac{upper-mean}{se} = F_X^{-1}(prob)$

$upper-mean = se* F_X^{-1}(prob)$

$upper-mean = \frac{\sigma}{\sqrt{n}}* F_X^{-1}(prob)$


$\sigma = \frac{\sqrt{n}*(upper-mean)}{F_X^{-1}(prob)}$

$n = [\frac{(F_X^{-1}(prob))*(\sigma)}{(upper-mean)}]^2 $

$upper = mean + \frac{\sigma}{\sqrt{n}}* F_X^{-1}(prob)$

find sd,n,lower

$(mean \ge lower) = prob$

$(mean \le lower) = 1-prob$

let $Z \approx N(0,1)$

$(Z \le (\frac{lower-mean}{se})) = 1-prob$

$\frac{lower-mean}{se} = F_X^{-1}(1-prob)$

$lower-mean = se* F_X^{-1}(1-prob)$

$lower-mean = \frac{\sigma}{\sqrt{n}}* F_X^{-1}(1-prob)$


$\sigma = \frac{\sqrt{n}*(lower-mean)}{F_X^{-1}(1-prob)}$

$n = [\frac{(F_X^{-1}(prob))*(\sigma)}{(upper-mean)}]^2 $

$lower = mean + \frac{\sigma}{\sqrt{n}}* F_X^{-1}(1-prob)$



In [187]:
def solution_standardization_mean(lower=None,upper=None,probability=None):
    if lower is None and upper is not None:
        print_latex(f"$P(\\bar X \\le {{{upper}}}) = {probability}$")
        print_latex(f"$P(\\frac{{\\bar X - \\mu}}{{\\sigma}} \\le \\frac{{{{{upper}}}-\\mu}}{{\\sigma}})  = {probability}$")
        print_latex(f"Let $Z=\\frac{{\\bar X - \\mu}}{{\\sigma}}$, then $Z\\approx N(0,1)$. Substituting Z, we get")
        print_latex(f"$P(Z \\le \\frac{{{{{upper}}}-\\mu}}{{\\sigma}})  = {probability}$")
        print("Taking the inverse of the cumulative distribution function, we get:")
        print_latex(f"$\\frac{{{{{upper}}}-\\mu}}{{\\sigma}} = $ norm.ppf(${probability}$)")
    elif lower is not None and upper is None:    
        print_latex(f"$P(\\bar X \\ge {{{lower}}}) = {probability}$")
        print_latex(f"$P(\\bar X \\le {{{lower}}}) = 1-{probability}$")
        print_latex(f"$P(\\frac{{\\bar X - \\mu}}{{\\sigma}} \\le \\frac{{{{{lower}}}-\\mu}}{{\\sigma}})  = 1 - {probability}$")
        print_latex(f"Let $Z=\\frac{{\\bar X - \\mu}}{{\\sigma}}$, then $Z\\approx N(0,1)$. Substituting Z, we get")
        print_latex(f"$P(Z \\le \\frac{{{{{lower}}}-\\mu}}{{\\sigma}})  = 1 - {probability}$")
        print("Taking the inverse of the cumulative distribution function, we get:")
        print_latex(f"$\\frac{{{{{lower}}}-\\mu}}{{\\sigma}} = $ norm.ppf($1 - {probability}$)")

                    
def probability_mean(probability=None,mean=None,sd=None,n=None,lower=None,upper=None):
    if mean is None:
        if probability is not None and sd is not None and n is not None and (lower is not None or upper is not None):
            if lower is None:
                solution_standardization_mean(upper=upper,probability=probability)
                print("Solving for the mean, we get:")
                print_latex(f"$\\mu={{{upper}}}$ - norm.ppf(${probability}$)$\\times{{{sd}}} \\approx {upper-norm.ppf(probability,loc=0,scale=1)*sd/math.sqrt(n) }$")
                print(f"The mean is {upper-norm.ppf(probability,loc=0,scale=1)*sd/math.sqrt(n) }")
            elif upper is None:
                solution_standardization_mean(lower=lower,probability=probability)
                print("Solving for the mean, we get:")
                print_latex(f"$\\mu={{{lower}}}$ - norm.ppf(1 - ${probability}$)$\\times{{{sd}}} \\approx {lower-norm.ppf(1-probability,loc=0,scale=1)*sd/math.sqrt(n)}$")
                print(f"The mean is {lower-norm.ppf(1-probability,loc=0,scale=1)*sd/math.sqrt(n)}")
            elif lower is not None and upper is not None:
                print(f"The mean cannot be calculated with both lower and upper bounds")
        else:
            print(f"To calculate the mean, please provide the probability, standard deviation, sample size, and either upper or lower bound")
    elif sd is None:
        if probability is not None and mean is not None and n is not None and (lower is not None or upper is not None):
            if lower is None and upper is not None:
                solution_standardization_mean(upper=upper,probability=probability)
                print("Solving for the standard deviation (se), we get:")
                print_latex(f"$\\frac{{sd}}{{\\sqrt{{n}}}}=\\frac{{{{{upper}}}-\\mu}}{{norm.ppf({probability})}} \\approx {abs((upper - mean)/ (norm.ppf(probability))) }$")
                print("Solving for the standard deviation of the sample, we get:")
                print_latex(f"$sd=|\\frac{{\\sqrt{{{n}}}\cdot({{{upper}}}-{{{mean}}})}}{{norm.ppf({probability})}}| \\approx {abs((upper - mean)*math.sqrt(n) / (norm.ppf(probability))) } $")
                print(f"The standard deviation is {abs((upper - mean)*math.sqrt(n) / (norm.ppf(probability))) }")
            elif upper is None:
                solution_standardization_mean(lower=lower,probability=probability)
                print("Solving for the standard deviation (se), we get:")
                print_latex(f"$\\frac{{sd}}{{\\sqrt{{n}}}}=\\frac{{{{{lower}}}-\\mu}}{{norm.ppf(1-{probability})}}  \\approx {abs((lower - mean)/ (norm.ppf(probability))) }$")
                print("Solving for the standard deviation of the sample, we get:")
                print_latex(f"$sd=|\\frac{{\\sqrt{{{n}}}\cdot({{{lower}}}-{{{mean}}})}}{{norm.ppf(1-{probability})}}| \\approx {abs((lower - mean)*math.sqrt(n) / (norm.ppf(probability))) } $")
                print(f"The standard deviation is {abs((mean - lower)*math.sqrt(n)/ (norm.ppf(1-probability)))}")
            elif lower is not None and upper is not None:
                print(f"Cannot calculate standard deviation with both lower and upper bounds")
        else:
            print(f"To calculate the standard deviation, please provide the probability, mean, sample size, and either upper or lower bound")
    elif n is None:
        if probability is not None and mean is not None and sd is not None and (lower is not None or upper is not None):
            if lower is None:
                solution_standardization_mean(upper=upper,probability=probability)
                print("Solving for the standard deviation (se), we get:")
                print_latex(f"$\\frac{{sd}}{{\\sqrt{{n}}}}=\\frac{{{{{upper}}}-\\mu}}{{norm.ppf({probability})}}  \\approx {abs((upper - mean)/ (norm.ppf(probability))) }$")
                print("Solving for the sample size, we get:")
                print_latex(f"n = $[\\frac{{{{{sd}}}*norm.ppf({probability})}}{{({{{upper}}}-{{{mean}}})}}]^2 \\approx {((sd*norm.ppf(probability))/(upper - mean))**2 }$")
                print(f"The sample size is {((sd*norm.ppf(probability))/(upper - mean))**2 }")
            elif upper is None:
                solution_standardization_mean(lower=lower,probability=probability)
                print("Solving for the standard deviation (se), we get:")
                print_latex(f"$\\frac{{sd}}{{\\sqrt{{n}}}}=\\frac{{{{{lower}}}-{{{mean}}}}}{{norm.ppf(1-{probability})}} \\approx {abs((lower - mean)/ (norm.ppf(probability))) }$")
                print("Solving for the sample size, we get:")
                print_latex(f"n = $[\\frac{{{{{sd}}}*norm.ppf(1-{probability})}}{{({{{lower}}}-{{{mean}}})}}]^2 \\approx {((sd*norm.ppf(1-probability))/(mean - lower))**2 }$")              
                print(f"The sample size is {((sd*norm.ppf(1-probability))/(mean - lower))**2 }")
            elif lower is not None and upper is not None:
                print(f"Cannot calculate sample size with both lower and upper bounds")
        else:
            print(f"To calculate the sample size, please provide the probability, mean, standard deviation, and either upper or lower bound")    
    elif probability is None:
        if mean is not None and sd is not None and n is not None:
            if lower is None and upper is None:
                print(f"To calculate the probability, lower bound or upper bound.")
            elif lower is not None and upper is None:
                print_latex(f"$P(\\bar X \\ge {{{lower}}}) = 1-P(\\bar X \le {{{lower}}})$")
                print_latex(f"$P(\\bar X \\ge {{{lower}}}) = 1- $ norm.cdf(${lower}$,${mean}$,$\\frac{{{sd}}}{{\\sqrt{{{n}}}}} \\approx {sd/math.sqrt(n)}$) ")
                print_latex(f"$P(\\bar X \\ge {{{lower}}}) = {1-norm.cdf(lower,loc=mean,scale=sd/math.sqrt(n))}$")
                print(f"The probability that the sample mean is greater than {lower} is {1-norm.cdf(lower,loc=mean,scale=sd/math.sqrt(n)) }")
            elif lower is None and upper is not None:
                print_latex(f"$P(\\bar X \\le {{{upper}}}) = $ norm.cdf(${upper}$,${mean}$,$\\frac{{{sd}}}{{\\sqrt{{{n}}}}} \\approx {sd/math.sqrt(n)}$) ")
                print_latex(f"$P(\\bar X \\le {{{upper}}}) = {norm.cdf(upper,loc=mean,scale=sd/math.sqrt(n)) }$")
                print(f"The probability that the sample mean is less than {upper} is {norm.cdf(upper,loc=mean,scale=sd/math.sqrt(n)) }")
            elif lower is not None and upper is not None:
                print_latex(f"$P({{{lower}}} \\le \\bar X \\le {{{upper}}}) = P(\\bar X \\le {{{upper}}}) - P(\\bar X \\le {{{lower}}})$")
                print_latex(f"$P({{{lower}}} \\le \\bar X \\le {{{upper}}}) = $ norm.cdf(${upper}$,${mean}$,$\\frac{{{sd}}}{{\\sqrt{{{n}}}}} \\approx {sd/math.sqrt(n)}$) - norm.cdf(${lower}$,${mean}$,$\\frac{{{sd}}}{{\\sqrt{{{n}}}}} \\approx {sd/math.sqrt(n)}$) ")
                print_latex(f"$P({{{lower}}} \\le \\bar X \\le {{{upper}}}) = {norm.cdf(upper,loc=mean,scale=sd/math.sqrt(n))-norm.cdf(lower,loc=mean,scale=sd/math.sqrt(n))}$")
                print(f"The probability that the sample mean is between {lower} and {upper} is {norm.cdf(upper,loc=mean,scale=sd/math.sqrt(n))-norm.cdf(lower,loc=mean,scale=sd/math.sqrt(n)) }")
        else:
            print("To calculate the probability, please provide the mean, standard deviation, sample size, and lower bound or upper bound.")
            
    elif lower is None and upper is not None:
        if probability is not None and mean is not None and sd is not None and n is not None:
            solution_standardization_mean(lower="lower",probability=probability)
            print("Solving for the lower bound we get:")
            print_latex(f"$lower={{{mean}}}-norm.ppf({probability})*\\frac{{{sd}}}{{\\sqrt{{{n}}}}} \\approx {mean - norm.ppf(probability, loc=0, scale=1) * sd/math.sqrt(n) }$")
            print(f"The lower bound is {mean + norm.ppf(1 - probability, loc=0, scale=1) * sd/math.sqrt(n) }")
        else:
            print("To calculate the lower bound, please provide the probability, mean, standard deviation, and sample size.")
    elif lower is not None and upper is None:
        if probability is not None and mean is not None and sd is not None and n is not None:
            solution_standardization_mean(upper="upper",probability=probability)
            print("Solving for the upper bound we get:")
            print_latex(f"$upper={{{mean}}}+norm.ppf({probability})*\\frac{{{sd}}}{{\\sqrt{{{n}}}}} \\approx {mean + norm.ppf(probability, loc=0, scale=1) * sd/math.sqrt(n) }$")
            print(f"The upper bound is {mean + norm.ppf(probability, loc=0, scale=1) * sd/math.sqrt(n) }")
        else:
            print("To calculate the upper bound, please provide the probability, mean, standard deviation, and sample size.")
    else:
        print("Please provide any 3 quantities from (probability, mean, standard deviation, sample size) and either upper or lower bound.")

#3a
mean=8
sd=4
n=30
upper=7
probability=0.08545176011539873
print("3a")
print("\n\nSolving for the mean:")
probability_mean(probability=probability,sd=sd,n=n,upper=upper)
print("\n\nSolving for the standard deviation:")
probability_mean(mean=mean,probability=probability,n=n,upper=upper)
print("\n\nSolving for the sample size:")
probability_mean(mean=mean,sd=sd,probability=probability,upper=upper)
print("\n\nSolving for the upper bound:")
probability_mean(mean=mean,sd=sd,n=n,probability=probability,lower=False)
print("\n\nSolving for the probability that sample mean is less than 7:")
probability_mean(mean=mean,sd=sd,n=n,upper=upper)


3a


Solving for the mean:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Taking the inverse of the cumulative distribution function, we get:


<IPython.core.display.Latex object>

Solving for the mean, we get:


<IPython.core.display.Latex object>

The mean is 8.0


Solving for the standard deviation:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Taking the inverse of the cumulative distribution function, we get:


<IPython.core.display.Latex object>

Solving for the standard deviation (se), we get:


<IPython.core.display.Latex object>

Solving for the standard deviation of the sample, we get:


<IPython.core.display.Latex object>

The standard deviation is 4.0


Solving for the sample size:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Taking the inverse of the cumulative distribution function, we get:


<IPython.core.display.Latex object>

Solving for the standard deviation (se), we get:


<IPython.core.display.Latex object>

Solving for the sample size, we get:


<IPython.core.display.Latex object>

The sample size is 30.0


Solving for the upper bound:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Taking the inverse of the cumulative distribution function, we get:


<IPython.core.display.Latex object>

Solving for the upper bound we get:


<IPython.core.display.Latex object>

The upper bound is 7.0


Solving for the probability that sample mean is less than 7:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

The probability that the sample mean is less than 7 is 0.08545176011539873


In [188]:
#3b
mean=8
sd=4
n=30
lower=7
probability=0.9145482398846012
print("3b")
print("\n\nSolving for the mean:")
probability_mean(probability=probability,sd=sd,n=n,lower=lower)
print("\n\nSolving for the standard deviation:")
probability_mean(mean=mean,probability=probability,n=n,lower=lower)
print("\n\nSolving for the sample size:")
probability_mean(mean=mean,sd=sd,probability=probability,lower=lower)
print("\n\nSolving for the upper bound:")
probability_mean(mean=mean,sd=sd,n=n,probability=probability,upper=False)
print("\n\nSolving for the probability that sample mean is greater than 7:")
probability_mean(mean=mean,sd=sd,n=n,lower=lower)


3b


Solving for the mean:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Taking the inverse of the cumulative distribution function, we get:


<IPython.core.display.Latex object>

Solving for the mean, we get:


<IPython.core.display.Latex object>

The mean is 8.0


Solving for the standard deviation:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Taking the inverse of the cumulative distribution function, we get:


<IPython.core.display.Latex object>

Solving for the standard deviation (se), we get:


<IPython.core.display.Latex object>

Solving for the standard deviation of the sample, we get:


<IPython.core.display.Latex object>

The standard deviation is 4.000000000000001


Solving for the sample size:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Taking the inverse of the cumulative distribution function, we get:


<IPython.core.display.Latex object>

Solving for the standard deviation (se), we get:


<IPython.core.display.Latex object>

Solving for the sample size, we get:


<IPython.core.display.Latex object>

The sample size is 29.99999999999999


Solving for the upper bound:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Taking the inverse of the cumulative distribution function, we get:


<IPython.core.display.Latex object>

Solving for the lower bound we get:


<IPython.core.display.Latex object>

The lower bound is 7.0


Solving for the probability that sample mean is greater than 7:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

The probability that the sample mean is greater than 7 is 0.9145482398846012


In [189]:
print("3c")
mean=8
sd=4
n=30
lower=7
upper=9
probability=0.8290964797692024
probability_mean(mean=mean,sd=sd,n=n,lower=lower,upper=upper)

3c


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

The probability that the sample mean is between 7 and 9 is 0.8290964797692024


Module 4.3: Sampling Distribution of the Sample Proportion

$ SE = \sqrt{\frac{p(1-p)}{\sqrt{n}}}$

$ SE^2 \cdot \sqrt{n} = p\cdot(1-p)$

$ SE^2 \cdot \sqrt{n} = p-p^2$

$ p^2 -p +SE^2 \cdot \sqrt{n} = 0 $

$p = \frac{1 \pm \sqrt{1-4\left(SE^2 \cdot \sqrt{n}\right)}}{2}$

$ n = \left(\frac{p-p^2}{SE^2}\right)^2$

In [190]:
def standard_error_proportion(se=None, proportion=None, n=None):
    if se is None and proportion is not None and n is not None:
        print_latex(f"Given the formula for the standard error of the proportion: $SE=\\sqrt{{\\frac{{ p(1-p) }}{{ n }} }}$. We substitute the values as follows:")
        print_latex(f"$SE=\\sqrt{{\\frac{{ {proportion}(1-{proportion}) }}{{ {n} }} }} \\approx {math.sqrt((proportion*(1-proportion))/(math.sqrt(n)))}$ ")
    elif proportion is None and n is not None and se is not None:
        print_latex(f"Given the formula for the standard error of the proportion: $SE=\\sqrt{{\\frac{{ p(1-p) }}{{ n }} }}$. We can rearrange as follows to isolate the proportion using the quadratic formula:")
        print_latex(f"$p = \\frac{{1 \\pm \\sqrt{{1-4\\left(SE^2 \\cdot \\sqrt{{n}}\\right)}}}}{{2}}$")
        print("Substituting the values we get:")
        print_latex(f"$p = \\frac{{1 \\pm \\sqrt{{1-4\\left({se}^2 \\cdot \\sqrt{n}\\right)}}}}{{2}} \\approx {(1+math.sqrt(1-4*(math.sqrt(n)*(se**2))))/(2)} $ or ${(1+math.sqrt(1+4*(math.sqrt(n)*(se**2))))/(2)} $")
    elif n is None and se is not None and proportion is not None:
        print_latex(f"Given the formula for the standard error of the proportion: $SE=\\sqrt{{\\frac{{ p(1-p) }}{{ n }} }}$. We can rearrange as follows to isolate the sample size:")
        print_latex(f"$ n = \\left(\\frac{{p-p^2}}{{SE^2}}\\right)^2$")
        print("Substituting the values we get:")
        print_latex(f"$ n = \\left(\\frac{{{proportion}-{proportion}^2}}{{{se}^2}}\\right)^2 \\approx {((proportion-proportion**2)/(se**2))**2}$")
    else:
        print(f"Please provide two of the three values: se, proportion, n")
        

se=0.0871
n=1000
proportion=0.6
standard_error_proportion(proportion=0.6, n=1000)
standard_error_proportion(se=0.08711754349385725, n=1000)
standard_error_proportion(se=0.08711754349385725, proportion=0.6)

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Substituting the values we get:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Substituting the values we get:


<IPython.core.display.Latex object>

$P(\hat p \le upper) = prob$




$P(\hat p \ge lower) = prob$


$P(lower \le \hat p \le upper) = prob$


In [191]:

from scipy.stats import norm
import math

def solution_standardization_proportion(lower=None, upper=None, probability=None):
    if lower is None and upper is not None:
        print_latex(f"$P(\hat p \\leq {upper}) = {probability}$")
        print_latex(f"$P( \\frac{{{{\hat p}} - p}}{{\sqrt{{p*(1-p)/n}}}} \\leq \\frac{{{{upper}}-p}}{{\sqrt{{p*(1-p)/n}}}}) = {probability}$")
        print_latex(f"Let $Z = \\frac{{{{\hat p}} - p}}{{\sqrt{{p*(1-p)/n}}}}$, then $Z \\sim N(0, 1)$")
        print_latex(f"$P(Z \\leq \\frac{{{{upper}}-p}}{{\sqrt{{p*(1-p)/n}}}}) = {probability}$")
        print_latex(f"Taking inverse of the cumulative distribution function, we get:")
        print_latex(f"$\\frac{{{{upper}} - p}}{{\sqrt{{p*(1-p)/n}}}} = $ norm.ppf(${probability}$)")
    elif lower is not None and upper is None:    
        print_latex(f"$P(\hat p \\geq {lower}) = {probability}$")
        print_latex(f"$P(\hat p \\leq {lower}) = 1 - {probability}$")
        print_latex(f"$P( \\frac{{{{\hat p}} - p}}{{\sqrt{{p*(1-p)/n}}}} \\leq \\frac{{{{lower}} - p}}{{\sqrt{{p*(1-p)/n}}}}) = 1 - {probability}$")
        print_latex(f"Let $Z = \\frac{{{{\hat p}} - p}}{{\sqrt{{p*(1-p)/n}}}}$, then $Z \\sim N(0, 1)$")
        print_latex(f"$P(Z \\leq \\frac{{{{lower}} - p}}{{\sqrt{{p*(1-p)/n}}}}) = 1 - {probability}$")
        print_latex(f"Taking inverse of the cumulative distribution function, we get:")
        print_latex(f"$\\frac{{{{lower}} - p}}{{\sqrt{{p*(1-p)/n}}}} = $ norm.ppf($1 - {probability}$)")

def probability_proportion(probability=None,proportion=None,n=None,lower=None,upper=None):
    if proportion is None:
        if probability is not None and n is not None and (lower is not None or upper is not None):
            if lower is None:
                solution_standardization_proportion(upper=upper,probability=probability)
                print("Solving for p, we get:")
                d=norm.ppf(probability)
                d_latex=f"norm.ppf({probability})"
                a=n+d**2
                a_latex=f"{n}+{d_latex}^2"
                b=-d**2-2*upper*n
                b_latex=f"-{d_latex}^2-2*({upper}*{n})"
                c=upper**2*n
                c_latex=f"{upper}^2*{n}"
                p1=(-b+math.sqrt(b**2-4*a*c))/(2*a)
                p2=(-b-math.sqrt(b**2-4*a*c))/(2*a)
                print_latex(f"$p = \\frac{{-({b_latex}) + \\sqrt{{({b_latex})^2-4({a_latex})({c_latex})}} }}{{2\\cdot ({a_latex})}} \\approx {p1}$")
                print(f"The proportion is {p1}")
            elif upper is None:
                solution_standardization_proportion(lower=lower,probability=probability)
                print("Solving for p, we get:")
                d=norm.ppf(1-probability)
                d_latex=f"norm.ppf(1-{probability})"
                a=n+d**2
                a_latex=f"{n}+{d_latex}^2"
                b=-d**2-2*lower*n
                b_latex=f"-{d_latex}^2-2*({lower}*{n})"
                c=lower**2*n
                c_latex=f"{lower}^2*{n}"
                p1=(-b+math.sqrt(b**2-4*a*c))/(2*a)
                p2=(-b-math.sqrt(b**2-4*a*c))/(2*a)
                print_latex(f"$p = \\frac{{-({b_latex}) - \\sqrt{{({b_latex})^2-4({a_latex})({c_latex})}} }}{{2\\cdot ({a_latex})}} \\approx {p2}$")
                print(f"The proportion is {p2}")
            elif lower is not None and upper is not None:
                print(f"The proportion cannot be calculated with both lower and upper bounds")
        else:
            print(f"To calculate the proportion, please provide the probability, sample size, and either upper or lower bound")
    elif n is None:
        if probability is not None and proportion is not None and (lower is not None or upper is not None):
            if lower is None:
                solution_standardization_proportion(upper=upper,probability=probability)
                print("Solving for n, we get:")
                d=norm.ppf(probability)
                d_latex=f"norm.ppf({probability})"
                print_latex(f"$n = \\frac{{({d_latex})^2(p)(1-p)}}{{({upper}-p)^2}} \\approx {(d**2*proportion*(1-proportion))/((upper-proportion)**2)}$")
                print(f"The sample size is {((proportion*(1-proportion))/((upper - proportion)/norm.ppf(probability))**2) :.0f}")
            elif upper is None:
                solution_standardization_proportion(lower=lower,probability=probability)
                print("Solving for n, we get:")
                d=norm.ppf(1-probability)
                d_latex=f"norm.ppf(1-{probability})"
                print_latex(f"$n = \\frac{{({d_latex})^2(p)(1-p)}}{{({lower}-p)^2}} \\approx {(d**2*proportion*(1-proportion))/((lower-proportion)**2)}$")
                print(f"The sample size is {((proportion*(1-proportion))/((lower - proportion)/norm.ppf(probability))**2) :.0f}")
            elif lower is not None and upper is not None:
                print(f"Cannot calculate sample size with both lower and upper bounds")
        else:
            print(f"To calculate the sample size, please provide the probability, proportion, and either upper or lower bound")
    elif probability is None:
        if proportion is not None and n is not None:
            if lower is None and upper is None:
                print(f"To calculate the probability, provide lower bound or upper bound.")
            elif lower is not None and upper is None:
                print_latex(f"$P(\\hat p \\ge {lower}) = 1 - P(\\hat p \\le {lower})$")
                print_latex(f"$P(\\hat p \\ge {lower}) = 1 - $ norm.cdf($ {lower} $ ,loc=${proportion}$,scale=$\\sqrt{{ \\frac{{ {proportion} \\cdot (1-{proportion})}} {{{n}}} }} \\approx {math.sqrt((proportion*(1-proportion))/n)}$)")
                print_latex(f"$P(\\hat p \\ge {lower}) = {1-norm.cdf(lower,loc=proportion,scale=math.sqrt((proportion*(1-proportion))/n)) }$")
                print(f"The probability that the sample proportion is greater than {lower} is {1-norm.cdf(lower,loc=proportion,scale=math.sqrt((proportion*(1-proportion))/n)) }")
            elif lower is None and upper is not None:
                print_latex(f"$P(\\hat p \\le {upper}) = $ norm.cdf($ {upper} $ ,loc=${proportion}$,scale=$\\sqrt{{ \\frac{{ {proportion} \\cdot (1-{proportion})}} {{{n}}} }} \\approx {math.sqrt((proportion*(1-proportion))/n)}$)")
                print_latex(f"$P(\\hat p \\le {upper}) = {norm.cdf(upper,loc=proportion,scale=math.sqrt((proportion*(1-proportion))/n)) }$")
                print(f"The probability that the sample proportion is less than {upper} is {norm.cdf(upper,loc=proportion,scale=math.sqrt((proportion*(1-proportion))/n)) }")
            elif lower is not None and upper is not None:
                print_latex(f"$P({lower} \le \\hat p \le {upper}) = P(\\hat p \le {upper} ) - P(\\hat p \le {lower}) $")
                print_latex(f"$P({lower} \le \\hat p \le {upper}) = $ norm.cdf ($ {upper} $ ,loc=${proportion}$,scale=$\\sqrt{{ \\frac{{ {proportion} \\cdot (1-{proportion})}} {{{n}}} }} \\approx {math.sqrt((proportion*(1-proportion))/n)}$) - norm.cdf($ {lower} $ ,loc=${proportion}$,scale=$\\sqrt{{ \\frac{{ {proportion} \\cdot (1-{proportion})}} {{{n}}} }} \\approx {math.sqrt((proportion*(1-proportion))/n)}$)")
                print_latex(f"$P(\\hat p \\le {upper}) = {norm.cdf(upper,loc=proportion,scale=math.sqrt((proportion*(1-proportion))/n))-norm.cdf(lower,loc=proportion,scale=math.sqrt((proportion*(1-proportion))/n)) }$")
                print(f"The probability that the sample proportion is between {lower} and {upper} is {norm.cdf(upper,loc=proportion,scale=math.sqrt((proportion*(1-proportion))/n))-norm.cdf(lower,loc=proportion,scale=math.sqrt((proportion*(1-proportion))/n)) }")
        else:
            print("To calculate the probability, please provide the proportion, sample size, and lower bound or upper bound.")
    elif lower is None and upper is not None:
        if probability is not None and proportion is not None and n is not None:
            solution_standardization_proportion(lower="lower",probability=probability)
            print("Solving for the lower bound, we get:")
            print_latex(f"$lower= {proportion} + $ norm.ppf( $ 1 - {probability} $) $* \\sqrt{{ \\frac{{ {proportion} \\cdot (1-{proportion})}} {{{n}}} }} \\approx {proportion + norm.ppf(1 - probability, loc=0, scale=1) * math.sqrt((proportion*(1-proportion))/n) }$")
            print(f"The lower bound is {proportion + norm.ppf(1 - probability, loc=0, scale=1) * math.sqrt((proportion*(1-proportion))/n) }")
        else:
            print("To calculate the lower bound, please provide the probability, proportion, and sample size.")
    elif lower is not None and upper is None:
        if probability is not None and proportion is not None and n is not None:
            solution_standardization_proportion(upper="upper",probability=probability)
            print("Solving for the upper bound, we get:")
            print_latex(f"$upper= {proportion} + $ norm.ppf( $ {probability} $) $* \\sqrt{{ \\frac{{ {proportion} \\cdot (1-{proportion})}} {{{n}}} }} \\approx {proportion + norm.ppf(probability, loc=0, scale=1) * math.sqrt((proportion*(1-proportion))/n) }$")
            print(f"The upper bound is {proportion + norm.ppf(probability, loc=0, scale=1) * math.sqrt((proportion*(1-proportion))/n) }")
        else:
            print("To calculate the upper bound, please provide the probability, proportion, and sample size.")
    else:
        print("Please provide any 3 quantities from (probability, proportion, sample size) and either upper or lower bound.")





# probability_proportion(proportion=0.3,n=64,lower=0.4)
# probability_proportion(proportion=0.3,n=100,upper=25/100)
# probability_proportion(proportion=0.7,n=120,upper=100/120)

# probability_proportion(proportion=0.4,n=80,lower=0.44,upper=0.61)


# probability_proportion(proportion=,n=80,lower=0.44)

probability_population(mean=0.498,sd=0.002,lower=0.496,upper=0.504)

probability_proportion(proportion=0.839994848036913,n=150,lower=0.9)

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

The probability that the X is between 0.496 and 0.504 is 0.839994848036913


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

The probability that the sample proportion is greater than 0.9 is 0.02250262962632188
