# Confidence Interval for a proportion


## Data generation

In [0]:
import numpy as np

In [0]:
np.random.seed(1)

statistical_population = np.random.randint(2, size = 100000) 

random_sample = np.random.choice(statistical_population, size = 1000)

In [31]:
statistical_population.mean()

0.49771

## Sample mean

In [32]:
random_sample.mean()

0.502

## Confidence interval for a proportion

In [0]:
from statsmodels.stats.proportion import proportion_confint

### Confidence interval for the standard normal distribution

$$\hat{p}\pm z_{1-\frac{\alpha}{2}} \sqrt{\frac{\hat{p}\left(1-\hat{p}\right)}{n}}$$

In [0]:
normal_interval = proportion_confint(
    sum(random_sample),
    len(random_sample),
    method = 'normal',
)

In [39]:
print (f'Normal_interval: [{normal_interval[0]:.3f}, {normal_interval[1]:.3f}]')
print (f'Width: {(normal_interval[1] - normal_interval[0]):.3f}')

Normal_interval: [0.471, 0.533]
Width: 0.062


### Wilson score interval

$$\frac1{ 1 + \frac{z^2}{n} } \left( \hat{p} + \frac{z^2}{2n} \pm z \sqrt{ \frac{ \hat{p}\left(1-\hat{p}\right)}{n} + \frac{
z^2}{4n^2} } \right), \;\; z \equiv z_{1-\frac{\alpha}{2}}$$ 

In [0]:
wilson_interval = proportion_confint(sum(random_sample), len(random_sample), method = 'wilson')

In [40]:
print (f'Wilson_interval: [{wilson_interval[0]:.3f}, {wilson_interval[1]:.3f}]')
print (f'Width: {(wilson_interval[1] - wilson_interval[0]):.3f}')

Wilson_interval: [0.471, 0.533]
Width: 0.062


## How to Find a Sample Size Given a Confidence Interval and Width

In [0]:
from statsmodels.stats.proportion import samplesize_confint_proportion

In [44]:
n_samples = int(np.ceil(samplesize_confint_proportion(random_sample.mean(), 0.01)))

print(f'Requires number of samples: {n_samples}')

Requires number of samples: 9604


In [0]:
np.random.seed(1)
random_sample = np.random.choice(statistical_population, size = n_samples)

In [0]:
normal_interval = proportion_confint(sum(random_sample), len(random_sample), method = 'normal')

In [48]:
print (f'Normal_interval: [{normal_interval[0]:.3f}, {normal_interval[1]:.3f}]')
print (f'Width: {(normal_interval[1] - normal_interval[0])}')

Normal_interval: [0.482, 0.502]
Width: 0.019996925850422953
