In [11]:
import numpy as np
import scipy.stats as sts

1. The mean of a binomial(k, n, p, q)  = n*p
2. SD of binomial(k,n,p,q) = sqrt(n*p*q)

Using this, we can approximate the binomial posterior distribution using a normal distribution.

In [12]:
def normal_approx_binom(samples, p):
    '''
    1. The mean of a binomial(k, n, p, q)  = n*p
    2. SD of binomial(k,n,p,q) = sqrt(n*p*q)
    Using this, we can approximate the binomial 
    posterior distribution using a normal distribution.
    '''
    
    mu = samples*p 
    sigma = np.sqrt(mu*(1-p))
    distribution = sts.norm(mu, sigma)
    
    #divide by num samples to get proportion, multiply by 100 to get percent 
    normalization = samples/100
    
    #Return 95% confidence interval as percentages
    return [distribution.ppf(0.025)/normalization, distribution.ppf(0.975)/normalization]

In [13]:
def get_sampling_error(samples, p):
    ci = normal_approx_binom(samples, p)
    
    #Round distance from mean to nearest percentage point
    return np.round(np.mean(ci)-ci[0])

In [14]:
#Q1, samples =1000 percentage = 10
get_sampling_error(1000, 0.1)

2.0

In [15]:
table = [[get_sampling_error(samples, p) for p in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]]
         for samples in [1000, 750, 500, 250, 100]]

In [16]:
np.array(table).T

array([[ 2.,  2.,  3.,  4.,  6.],
       [ 2.,  3.,  4.,  5.,  8.],
       [ 3.,  3.,  4.,  6.,  9.],
       [ 3.,  4.,  4.,  6., 10.],
       [ 3.,  4.,  4.,  6., 10.],
       [ 3.,  4.,  4.,  6., 10.],
       [ 3.,  3.,  4.,  6.,  9.],
       [ 2.,  3.,  4.,  5.,  8.],
       [ 2.,  2.,  3.,  4.,  6.]])

In [17]:
import pandas as pd
pd.DataFrame(np.array(table).T, columns=["1000", "750", "500","250", "100"])

Unnamed: 0,1000,750,500,250,100
0,2.0,2.0,3.0,4.0,6.0
1,2.0,3.0,4.0,5.0,8.0
2,3.0,3.0,4.0,6.0,9.0
3,3.0,4.0,4.0,6.0,10.0
4,3.0,4.0,4.0,6.0,10.0
5,3.0,4.0,4.0,6.0,10.0
6,3.0,3.0,4.0,6.0,9.0
7,2.0,3.0,4.0,5.0,8.0
8,2.0,2.0,3.0,4.0,6.0
