In [1]:
# import libraries
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import math
import statistics

In [2]:
num_samples = [5, 10, 20, 40, 80, 160, 1000]
desired_mean = 10
desired_std_dev = 2.0
confidence = 95 # in percent


In [3]:
#1. The simple, analytic approach with large n and/or known standard deviation.
for t in num_samples:
    samples = np.random.normal(loc=desired_mean, scale=desired_std_dev, size=t)
    
    n, min_max, actual_mean, var, skew, kurt = stats.describe(samples)
    #print(n, min_max, mean, var, skew, kurt)
    
    actual_std=math.sqrt(var)
    #actual_std = statistics.stdev(samples) # Return the same value
    
    #The location (loc) keyword specifies the mean.
    #The scale (scale) keyword specifies the standard deviation.

    # We will assume a normal distribution
    R = stats.norm.interval(0.05,loc=actual_mean,scale=actual_std)
    
    print("size: {}   actual mean = {:.4f} actual stdv = {:.4f}".format(n, actual_mean, actual_std))
    print("A 95% confidence level interval:   ",list(R))
   

size: 5   actual mean = 10.0514 actual stdv = 2.1061
A 95% confidence level interval:    [9.919370120091392, 10.183506023295744]
size: 10   actual mean = 8.4295 actual stdv = 2.1512
A 95% confidence level interval:    [8.294609041794516, 8.564400921153954]
size: 20   actual mean = 10.1857 actual stdv = 2.7063
A 95% confidence level interval:    [10.015951575136759, 10.35536139605645]
size: 40   actual mean = 10.0336 actual stdv = 1.8500
A 95% confidence level interval:    [9.917617283375801, 10.149637835456016]
size: 80   actual mean = 9.9633 actual stdv = 2.0305
A 95% confidence level interval:    [9.835976722914435, 10.090627491158468]
size: 160   actual mean = 9.9512 actual stdv = 1.9739
A 95% confidence level interval:    [9.82743646408973, 10.074988744842505]
size: 1000   actual mean = 10.0481 actual stdv = 2.0432
A 95% confidence level interval:    [9.919941557346485, 10.176185660454854]


In [4]:

#2. The simple, analytic approach with small n and unknown population standard deviation

for n in num_samples:
    samples = np.random.normal(loc=desired_mean, scale=desired_std_dev, size=n)

    actual_mean = np.mean(samples)
    actual_std = statistics.stdev(samples)
    #actual_std = np.std(samples)   #this function is /n and stdev is /(n-1)
    
    # compute confidence intervals
    citmp = (1-confidence/100)/2
    confint = actual_mean + stats.t.ppf([citmp, 1-citmp],n-1) * actual_std/np.sqrt(n)
    
    
    print("size: {}   actual mean = {:.4f} actual stdv = {:.4f}".format(n, actual_mean, actual_std))
    print("A 95% confidence level interval:   ",confint)

size: 5   actual mean = 10.0549 actual stdv = 1.0983
A 95% confidence level interval:    [ 8.69123656 11.41862113]
size: 10   actual mean = 10.4361 actual stdv = 1.8144
A 95% confidence level interval:    [ 9.13819764 11.7340258 ]
size: 20   actual mean = 10.2997 actual stdv = 2.5657
A 95% confidence level interval:    [ 9.09886892 11.5004408 ]
size: 40   actual mean = 10.1453 actual stdv = 2.2684
A 95% confidence level interval:    [ 9.41978342 10.87074362]
size: 80   actual mean = 9.8356 actual stdv = 2.0382
A 95% confidence level interval:    [ 9.38202952 10.28918967]
size: 160   actual mean = 10.0728 actual stdv = 1.9678
A 95% confidence level interval:    [ 9.76554905 10.38004018]
size: 1000   actual mean = 9.9681 actual stdv = 2.0465
A 95% confidence level interval:    [ 9.84114831 10.09513505]


In [5]:
#3. Bootstrapped confidence intervals

### now for bootstrapping
numBoots  = 1000
bootmeans = np.zeros(numBoots)

for n in num_samples:
    samples = np.random.normal(loc=desired_mean, scale=desired_std_dev, size=n)

    actual_mean = np.mean(samples)
    #actual_std = np.std(samples)
    actual_std = statistics.stdev(samples)

    # resample with replacement
    for booti in range(numBoots):
        bootmeans[booti] = np.mean( np.random.choice(samples,n) )
    
    boot_mean = np.mean(bootmeans)
    #boot_std = np.std(bootmeans)
    boot_std = statistics.stdev(bootmeans)

    # find confidence intervals
    confint = [0,0] # initialize
    confint[0] = np.percentile(bootmeans,(100-confidence)/2)
    confint[1] = np.percentile(bootmeans,100-(100-confidence)/2)
    
    print("size:     {}   actual mean = {:.4f} actual stdv = {:.4f}".format(n, actual_mean, actual_std))
    print("NumBoots: {}    boot mean = {:.4f}   boot stdv = {:.4f}".format(numBoots,  boot_mean,  boot_std))
    print("A 95% confidence level interval:   ",confint)

size:     5   actual mean = 9.6232 actual stdv = 0.8857
NumBoots: 1000    boot mean = 9.6305   boot stdv = 0.3654
A 95% confidence level interval:    [8.958782790448506, 10.366719834209267]
size:     10   actual mean = 10.5169 actual stdv = 2.1081
NumBoots: 1000    boot mean = 10.5091   boot stdv = 0.6250
A 95% confidence level interval:    [9.316260253920557, 11.723124275868104]
size:     20   actual mean = 10.6033 actual stdv = 2.3183
NumBoots: 1000    boot mean = 10.5985   boot stdv = 0.5088
A 95% confidence level interval:    [9.622365187850624, 11.596061945110334]
size:     40   actual mean = 9.8997 actual stdv = 1.5399
NumBoots: 1000    boot mean = 9.8951   boot stdv = 0.2442
A 95% confidence level interval:    [9.415977132828234, 10.342367392052523]
size:     80   actual mean = 10.0281 actual stdv = 1.7002
NumBoots: 1000    boot mean = 10.0314   boot stdv = 0.1882
A 95% confidence level interval:    [9.655434057579493, 10.411524461494599]
size:     160   actual mean = 9.9815 act

In [None]:
#4. Bayesian credible intervals