In [2]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import math
from scipy import stats
from scipy.stats import norm
from scipy.stats import chi2
from scipy.stats import t
from scipy.stats import f
from scipy.stats import bernoulli
from scipy.stats import binom
from scipy.stats import nbinom
from scipy.stats import geom
from scipy.stats import poisson
from scipy.stats import uniform
from scipy.stats import randint
from scipy.stats import expon
from scipy.stats import gamma
from scipy.stats import beta
from scipy.stats import weibull_min
from scipy.stats import hypergeom
from scipy.stats import shapiro
from scipy.stats import pearsonr
from scipy.stats import normaltest
from scipy.stats import anderson
from scipy.stats import spearmanr
from scipy.stats import kendalltau
from scipy.stats import chi2_contingency
from scipy.stats import ttest_ind
from scipy.stats import ttest_rel
from scipy.stats import mannwhitneyu
from scipy.stats import wilcoxon
from scipy.stats import kruskal
from scipy.stats import friedmanchisquare
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from statsmodels.stats.weightstats import ztest
from scipy.integrate import quad
from IPython.display import display, Latex

# **Confidence Interval for the Mean of a Normal Population:**

## **1. Known Standard Deviation:**

**A. Two-sided Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a known variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\\ $

$P(-\ Z_{\frac{\alpha}{2}}\ \leq\ \frac{\overline{X}-\mu}{\frac{\sigma}{\sqrt{n}}}\ \leq\ Z_{\frac{\alpha}{2}}) = 1-\alpha$

$P(\overline{X}\ -\ Z_{\frac{\alpha}{2}} \frac{\sigma}{\sqrt{n}}\ \leq\ \mu\ \leq\ \overline{X}\ +\ Z_{\frac{\alpha}{2}} \frac{\sigma}{\sqrt{n}}) = 1-\alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[\overline{X}\ -\ Z_{\frac{\alpha}{2}} \frac{\sigma}{\sqrt{n}},\ \overline{X}\ +\ Z_{\frac{\alpha}{2}} \frac{\sigma}{\sqrt{n}}]$

<br>
<br>
<br>
<br>

**B. One-sided Lower Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a known variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\\ $

$P(-\infty\ \leq\ \frac{\overline{X}-\mu}{\frac{\sigma}{\sqrt{n}}}\ \leq\ Z_{\alpha}) = 1-\alpha$

$P(-\infty\ \leq\ \mu\ \leq\ \overline{X}\ +\ Z_{\alpha} \frac{\sigma}{\sqrt{n}}) = 1-\alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[-\infty,\ \overline{X}\ +\ Z_{\alpha} \frac{\sigma}{\sqrt{n}}]$

<br>
<br>
<br>
<br>

**C. One-sided Upper Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a known variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\\ $

$P(-\ Z_{\alpha}\ \leq\ \frac{\overline{X}-\mu}{\frac{\sigma}{\sqrt{n}}}\ \leq\ \infty) = 1-\alpha$

$P(\overline{X}\ -\ Z_{\alpha} \frac{\sigma}{\sqrt{n}}\ \leq\ \mu\ \leq\   \infty) = 1-\alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[\overline{X}\ -\ Z_{\alpha} \frac{\sigma}{\sqrt{n}},\ \infty]$

In [29]:
class confidence_interval_for_mean:
    """
    Parameters
    ----------
    sd : known standard deviation
    n : number of samples
    c_level : percentage confidence level (often 0.95) 
    type : 'two_sided', 'lower', 'upper'
    sample_mean : mean of the sample 
    data : data 
    """
    def __init__(self, sd, level_c, type_c, n = 0, sample_mean = 0., data = None):
        self.sample_mean = sample_mean 
        self.sd = sd
        self.n = n 
        self.level_c = level_c 
        self.type_c = type_c
        self.data = data 
        if data is not None:
            self.sample_mean = np.mean(list(data))
            self.n = len(list(data))
        
        confidence_interval_for_mean.__test(self)

    def __test(self):
        if self.type_c == 'two_sided':
            c_lower = self.sample_mean - (-norm.ppf((1-self.level_c)/2)) * (self.sd / np.sqrt(self.n))
            c_upper = self.sample_mean + (-norm.ppf((1-self.level_c)/2)) * (self.sd / np.sqrt(self.n))
            display(Latex(f'${c_lower} \leq \mu \leq {c_upper}$')) 
        elif self.type_c == 'lower':
            c_upper = self.sample_mean + (norm.ppf(1 - self.level_c) * (self.sd / np.sqrt(self.n)))
            display(Latex(f'$ - \infty < \mu \leq {c_upper}$')) 
        elif self.type_c == 'upper':
            c_lower = self.sample_mean - (norm.ppf(1 - self.level_c) * (self.sd / np.sqrt(self.n)))
            display(Latex(f'${c_lower} \leq \mu < + \infty $')) 
            


In [30]:
data = np.random.normal(loc = 2, scale = 3, size = 1000)
confidence_interval_for_mean(sd = 3, level_c = 0.95, type_c = 'two_sided', data = data)

<IPython.core.display.Latex object>

<__main__.confidence_interval_for_mean at 0x7fe8992aa4c0>

## **2. Unknown Standard Deviation:**

**A. Two-sided Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$P(-\ t_{\frac{\alpha}{2},n-1}\ <\ \frac{\overline{X}-\mu}{\frac{S}{\sqrt{n}}} <\ t_{\frac{\alpha}{2},n-1}) = 1-\alpha$

$P(\overline{X}\ -\ t_{\frac{\alpha}{2},n-1} \frac{S}{\sqrt{n}}\ <\ \mu\ <\ \overline{X}\ +\ t_{\frac{\alpha}{2},n-1} \frac{S}{\sqrt{n}}) = 1- \alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[\overline{X}\ -\ t_{\frac{\alpha}{2},n-1} \frac{S}{\sqrt{n}},\ \overline{X}\ +\ t_{\frac{\alpha}{2},n-1} \frac{S}{\sqrt{n}}]$

<br>
<br>
<br>

**B. One-sided Lower Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a known variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\\ $

$P(-\infty\ \leq\ \frac{\overline{X}-\mu}{\frac{S}{\sqrt{n}}}\ \leq\ t_{\alpha,n-1}) = 1-\alpha$

$P(-\infty\ \leq\ \mu\ \leq\ \overline{X}\ +\ t_{\alpha,n-1} \frac{S}{\sqrt{n}}) = 1-\alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[-\infty,\ \overline{X}\ +\ t_{\alpha,n-1} \frac{S}{\sqrt{n}}]$

<br>
<br>
<br>

**C. One-sided upper Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a known variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\\ $

$P(-t_{\alpha,n-1} \leq\ \frac{\overline{X}-\mu}{\frac{S}{\sqrt{n}}}\ \leq\ \infty) = 1-\alpha$

$P(\overline{X}\ -\ t_{\alpha,n-1} \frac{S}{\sqrt{n}} \leq\ \mu\ \leq\ \infty) = 1-\alpha$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the mean of a normal population is:

$[\overline{X}\ -\ t_{\alpha,n-1} \frac{S}{\sqrt{n}},\ \infty]$

In [33]:
class confidence_interval_for_mean_no_sd:
    """
    Parameters
    ----------
    n : number of samples
    c_level : percentage confidence level (often 0.95) 
    type : 'two_sided', 'lower', 'upper'
    sample_mean : mean of the sample 
    data : data 
    """
    def __init__(self, level_c, type_c, n = 0, sample_mean = 0., sample_sd = 0., data = None):
        self.sample_mean = sample_mean 
        self.n = n 
        self.level_c = level_c 
        self.type_c = type_c
        self.data = data 
        if data is not None:
            self.sample_mean = np.mean(list(data))
            self.sample_sd = np.std(list(data), ddof=1)
            self.n = len(list(data))
        
        confidence_interval_for_mean_no_sd.__test(self)

    def __test(self):
        if self.type_c == 'two_sided':
            c_lower = self.sample_mean - (t.isf((1-self.level_c)/2, self.n - 1)  * (self.sample_sd / np.sqrt(self.n)))
            c_upper = self.sample_mean + (t.isf((1-self.level_c)/2, self.n - 1)  * (self.sample_sd / np.sqrt(self.n)))
            display(Latex(f'${c_lower} \leq \mu \leq {c_upper}$')) 
        elif self.type_c == 'lower':
            c_upper = self.sample_mean + (t.isf(1 - self.level_c, self.n - 1)  * (self.sample_sd / np.sqrt(self.n)))
            display(Latex(f'$ - \infty < \mu \leq {c_upper}$')) 
        elif self.type_c == 'upper':
            c_lower = self.sample_mean - (t.isf(1 - self.level_c, self.n - 1)  * (self.sample_sd / np.sqrt(self.n)))
            display(Latex(f'${c_lower} \leq \mu < + \infty $')) 

In [34]:
data = [5, 8.5, 12, 15, 7, 9, 7.5, 6.5, 10.5]
confidence_interval_for_mean_no_sd(level_c = 0.95, type_c = 'two_sided', data = data)

<IPython.core.display.Latex object>

<__main__.confidence_interval_for_mean_no_sd at 0x7fe8992aaf10>

# **Confidence Interval for the Variance of a Normal Population:**

## **1. Unknown Mean of the Population:**

**A. Two-sided Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\ \chi^2_{1-\frac{\alpha}{2}, n-1}\ \leq\ \frac{(n-1)\ S^2}{\sigma^2} \ \leq \chi^2_{\frac{\alpha}{2}, n-1} $

$\ \frac{(n-1)\ S^2}{\chi^2_{\frac{\alpha}{2}, n-1}} \leq\ \sigma^2 \leq\ \frac{(n-1)\ S^2}{\chi^2_{1-\frac{\alpha}{2}, n-1}}$

$\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{\frac{\alpha}{2}, n-1}}} \leq\ \sigma \leq\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{1-\frac{\alpha}{2}, n-1}}}$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[\frac{(n-1)\ S^2}{\chi^2_{\frac{\alpha}{2}, n-1}},\ \frac{(n-1)\ S^2}{\chi^2_{1-\frac{\alpha}{2}, n-1}}]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[\sqrt{\frac{(n-1)\ S^2}{\chi^2_{\frac{\alpha}{2}, n-1}}},\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{1-\frac{\alpha}{2}, n-1}}}]$

<br>
<br>
<br>

**B. One-sided Lower Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\ 0 \leq\ \sigma^2 \leq\ \frac{(n-1)\ S^2}{\chi^2_{1-\alpha, n-1}}$

$\ 0 \leq\ \sigma \leq\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{1-\alpha, n-1}}}$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[0,\ \frac{(n-1)\ S^2}{\chi^2_{1-\frac{\alpha}{2}, n-1}}]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[0,\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{1-\alpha, n-1}}}]$
<br>
<br>
<br>

**C. One-sided Upper Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an unknown mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\ \frac{(n-1)\ S^2}{\chi^2_{\alpha, n-1}} \leq\ \sigma^2 \leq\ \infty$

$\ \sqrt{\frac{(n-1)\ S^2}{\chi^2_{\alpha, n-1}}} \leq\ \sigma \leq\ \infty$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[\frac{(n-1)\ S^2}{\chi^2_{\alpha, n-1}},\ \infty]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[\sqrt{\frac{(n-1)\ S^2}{\chi^2_{\alpha, n-1}}},\ \infty]$

## **2. Known Mean of the Population:**

**A. Two-sided Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an known mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

$S' = \sqrt{\frac{\sum_{i=1}^n\ (x_i\ -\ \overline{x})^2}{n}}$

$\\ $

Significance level = $\alpha$

$\ \chi^2_{1-\frac{\alpha}{2}, n}\ \leq\ \frac{(n)\ S'^2}{\sigma^2} \ \leq \chi^2_{\frac{\alpha}{2}, n} $

$\ \frac{(n)\ S'^2}{\chi^2_{\frac{\alpha}{2}, n}} \leq\ \sigma^2 \leq\ \frac{(n)\ S'^2}{\chi^2_{1-\frac{\alpha}{2}, n}}$

$\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{\frac{\alpha}{2}, n}}} \leq\ \sigma \leq\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{1-\frac{\alpha}{2}, n}}}$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[\frac{(n)\ S'^2}{\chi^2_{\frac{\alpha}{2}, n}},\ \frac{(n)\ S'^2}{\chi^2_{1-\frac{\alpha}{2}, n}}]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[\sqrt{\frac{(n)\ S'^2}{\chi^2_{\frac{\alpha}{2}, n}}},\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{1-\frac{\alpha}{2}, n}}}]$

<br>
<br>
<br>

**B. One-sided Lower Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an known mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\ 0\ \leq\ \frac{(n)\ S'^2}{\sigma^2} \ \leq \chi^2_{1-\alpha, n} $

$\ 0 \leq\ \sigma^2 \leq\ \frac{(n)\ S'^2}{\chi^2_{1-\alpha, n}}$

$\ 0 \leq\ \sigma \leq\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{1-\alpha, n}}}$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[0,\ \frac{(n)\ S'^2}{\chi^2_{1-\frac{\alpha}{2}, n}}]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[0,\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{1-\alpha, n}}}]$

<br>
<br>
<br>

**C. One-sided Upper Confidence Interval:**

Suppose that $X_1, X_2, ..., X_n$ is a sample of size $n$ from a normal distribution having an known mean $\mu$ and a unknown variance $\sigma^2$.

$X_1, X_2, ..., X_n \sim N( \mu, \sigma^2)$

$\\ $

Significance level = $\alpha$

$\ \chi^2_{1-\alpha, n}\ \leq\ \frac{(n)\ S'^2}{\sigma^2} \ \leq \infty $

$\ \frac{(n)\ S'^2}{\chi^2_{\alpha, n}} \leq\ \sigma^2 \leq\ \infty$

$\ \sqrt{\frac{(n)\ S'^2}{\chi^2_{\alpha, n}}} \leq\ \sigma \leq\ \infty$

$\\ $

Therefore, the $1-\alpha$ confidence interval for the variance of a normal population is:

$[\frac{(n)\ S'^2}{\chi^2_{\alpha, n}},\ \infty]$

and the $1-\alpha$ confidence interval for the standard deviation of a normal population is:

$[\sqrt{\frac{(n)\ S'^2}{\chi^2_{\alpha, n}}},\ \infty]$