In [32]:
#Filename: AP_Statistics.ipynb
#Author: Kyle Carlton Larson
#Purpose: Refreshing statistics knowledge with Confidence Interval Practice
import scipy.stats as st
from math import sqrt
#https://stackoverflow.com/questions/20864847/probability-to-z-score-and-vice-versa

alpha = 0.05
sigma = 1 #population standard deviation
n = 1 #test sample size 
sample_mean = 0
#remember talways use sigma over sqrt n in CIs
#because as you take more samples into account, the probability of encapsulating population mean increases
#**When the population standard deviation is known, the formula for a confidence interval (CI) 
#for a population mean is [line 19 below]
# n is the sample size, and z* represents the appropriate z*-value from the standard normal distribution
#for your desired confidence level.
def Z_alpha_over_two(_sigma, _n):
    return (_sigma/sqrt(_n) ) * st.norm.ppf(1-alpha/2)
#%(1-alpha) confidence that population mean is within the interval
lowerbound = sample_mean - Z_alpha_over_two(sigma, n) #to put apha/2 in each tail of curve
upperbound = sample_mean + Z_alpha_over_two(sigma, n)


In [33]:
print(st.norm.ppf(.95))
print(st.norm.cdf(1.64))

1.6448536269514722
0.9494974165258963


In [34]:
print('lower:'+str(lowerbound))
print('upper:'+str(upperbound))

lower:-1.959963984540054
upper:1.959963984540054


In [44]:
#the above seems narrow and the upper is above the lower which doesnt make sense
#**https://www.dummies.com/
#education/math/statistics/
#how-to-calculate-a-confidence-interval-for-a-population-mean-when-you-know-its-standard-deviation/
#
#Going to try n>30 as per CLT
n=31
lowerbound = sample_mean - Z_alpha_over_two(sigma, n) #to put apha/2 in each tail of curve
upperbound = sample_mean + Z_alpha_over_two(sigma, n)

In [45]:
print('lower:'+str(lowerbound))
print('upper:'+str(upperbound))

lower:-0.3520199234049176
upper:0.3520199234049176


In [None]:
#this result makes sense because, as the number of samples increases ,
#either the confidence that the population mean is within an interval either increases 
#or the interval becomes narrower


In [12]:
#as per the following reference: https://www.statology.org/confidence-intervals-python/
import numpy as np
import scipy.stats as st
from math import sqrt
#in the instance that sigma^2, the population variance is unknown, 
#define sample data
data = [12, 12, 13, 13, 15, 16, 17, 22, 23, 25, 26, 27, 28, 28, 29]

#create 95% confidence interval for population mean weight
print(data[:]-np.mean(data))
print(sqrt( (sum((data[:]-np.mean(data))**2))/(len(data)-1) )/sqrt(len(data)))
print(st.sem(data))
print(st.t.interval(alpha=0.95, df=len(data)-1, loc=np.mean(data), scale=st.sem(data)) )




[-8.4 -8.4 -7.4 -7.4 -5.4 -4.4 -3.4  1.6  2.6  4.6  5.6  6.6  7.6  7.6
  8.6]
1.6981782956478757
1.6981782956478755
(16.75776979778498, 24.042230202215016)


In [21]:
#to check this against the Quick Study Reference Guide "Worlds #1 Outline"
xbar=108
s=15 # to get the sample standard error from the standard deviviation, divide S by sqrt(n)
n=26 
confidence_level =0.95
st.t.interval(alpha=confidence_level, df=n-1, loc=xbar, scale=(s/sqrt(n))) #st.sem(data) being standard error is s
# the interval to validate against is 102 to 114

(101.94136888298902, 114.05863111701098)

In [None]:
#this checks out

In [41]:
#to check the manual method at the top of this notebook
xbar=0
s=1 # to get the sample standard error from the standard deviviation, divide S by sqrt(n)
n=31 
confidence_level =1-alpha
st.t.interval(alpha=confidence_level, df=n-1, loc=xbar, scale=(s/sqrt(n)))

(-0.3668029613349472, 0.3668029613349472)

In [47]:
#what if n is 60
n=60
lowerbound = sample_mean - Z_alpha_over_two(sigma, n) #to put apha/2 in each tail of curve
upperbound = sample_mean + Z_alpha_over_two(sigma, n)
print('lower:'+str(lowerbound))
print('upper:'+str(upperbound))
#to check the manual method at the top of this notebook
xbar=0
s=1 # to get the sample standard error from the standard deviviation, divide S by sqrt(n)
confidence_level =1-alpha
st.t.interval(alpha=confidence_level, df=n-1, loc=xbar, scale=(s/sqrt(n)))

lower:-0.25303026237633197
upper:0.25303026237633197


(-0.2583273923715249, 0.2583273923715249)

In [49]:
#what if n is 900
n=120
lowerbound = sample_mean - Z_alpha_over_two(sigma, n) #to put apha/2 in each tail of curve
upperbound = sample_mean + Z_alpha_over_two(sigma, n)
print('lower:'+str(lowerbound))
print('upper:'+str(upperbound))
#to check the manual method at the top of this notebook
xbar=0
s=1 # to get the sample standard error from the standard deviviation, divide S by sqrt(n)
confidence_level =1-alpha
st.t.interval(alpha=confidence_level, df=n-1, loc=xbar, scale=(s/sqrt(n)))

lower:-0.17891941437171568
upper:0.17891941437171568


(-0.1807575614052859, 0.1807575614052859)

In [50]:
#what if n is 3000
n=3000
lowerbound = sample_mean - Z_alpha_over_two(sigma, n) #to put apha/2 in each tail of curve
upperbound = sample_mean + Z_alpha_over_two(sigma, n)
print('lower:'+str(lowerbound))
print('upper:'+str(upperbound))
#to check the manual method at the top of this notebook
xbar=0
s=1 # to get the sample standard error from the standard deviviation, divide S by sqrt(n)
confidence_level =1-alpha
st.t.interval(alpha=confidence_level, df=n-1, loc=xbar, scale=(s/sqrt(n)))

lower:-0.03578388287434314
upper:0.03578388287434314


(-0.035798330602566444, 0.035798330602566444)