In [1]:
import math
import scipy
from scipy.stats import chisquare
import statistics as stat
import scipy.stats as st
import pandas as pd
import numpy as np
from scipy.stats import t

## Interval Estimation / Population Mean
Provides a range of plausible values, i.e. an interval, for the parameter of interest.

![image.png](attachment:d47bbdfb-35df-4379-9771-957a25ba25a2.png)

![image.png](attachment:26636a75-9ab4-4527-9fd5-a837748376c7.png)

In [14]:
alpha = 0.05
s =  0.2143 #Std deviation
Z = st.norm.ppf(1-(alpha/2))
M = 2.03125
LCL = M - (Z*s)
HCL = M + (Z*s)
LCL,HCL

(1.6112297181130664, 2.4512702818869334)

## CI for true mean
![image.png](attachment:7c5be379-6420-4a14-ad37-72817e4dda4d.png)

In [13]:
alpha = 0.05
v = 22512.94
s = np.sqrt(v) #if vairance is given
# s =   #Std deviation
n = 10  #Sample length
y = 576.6 #Sample mean (y bar)
Z = st.norm.ppf(1-(alpha/2))
b = (Z*(s/np.sqrt(n)))
LCL = y - b
HCL = y + b
Z,s,LCL,HCL

(1.959963984540054, 150.04312713350117, 483.6040151712142, 669.5959848287858)

In [10]:
alpha/2

0.025

### Sample size for margin of error (E)

![image.png](attachment:92b4ed67-385f-4c18-bb05-56311203a0ab.png)

In [11]:
alpha = 0.05
s = 2.5 #std deviation
Z = st.norm.ppf(1-(alpha/2))
E = 0.5
n = ((Z**2)*(s**2))/(E**2) #always round it to nearst whole no.
np.round(n)

96.0

### CI for normal population where std deviation is unkown and n <30

![image.png](attachment:13c60c46-2477-4698-bf36-c90106fca40d.png)

In [None]:
ogi_data = [4500,5100,6199,5400,4800,4900,5225,5600,5877]
data = np.array(ogi_data)
data = np.sort(data)
n = len(data)
y = np.sum(data)/n
v = stat.variance(data)
s = np.sqrt(v)
df = n-1
y,v,s,df

(2.03125, 0.04595535714285713, 0.21437200643474216, 7)

In [33]:
from scipy.stats import t

alpha = 0.1
t = t.ppf(1 - (alpha/2), df)
t

1.8595480375228424

In [34]:
t = 1.86
b = np.sqrt(v/n)
lcl= y - (t*b)
hcl= y + (t*b)
lcl,hcl,t,b,n

(4953.994544522033, 5624.005455477967, 1.86, 180.11045993439075, 9)

## Testing Probabilty of observing Sample mean for a given value
P(Z> or < ![image.png](attachment:89b89d00-813d-4645-8044-a9697baae312.png))

In [9]:
s = 81.1 #population std
n =  10 #sample size
y = 214.7 #sample mean
M = 189 #population mean
Zobs = (y-M)/(s/np.sqrt(n))
Zobs

Z = st.norm.cdf(Zobs)
P= 1 - Z #for P(Z>Zobs)
# P= Z #for P(Z<Zobs)
P

0.1581469764742388

# Hypothesis Testing – 1 mean

![image.png](attachment:063df6b8-8b0b-40ea-95cc-8b0d23a3652f.png)

![image.png](attachment:0af6f7b5-3bfb-4f72-9e4c-e89f86edcf2c.png)

![image.png](attachment:ec52f4a4-6ff7-4089-967d-1d5c5ddd018f.png)

In [35]:
s = 10 #population std
n =  25 #sample size
y = 100 #sample mean
#Ho: y=100 ; Ha y!=100
alpha= 0.05

Z = st.norm.ppf(1-(alpha/2))
b = (Z*(s/np.sqrt(n)))
LCL = y - b
HCL = y + b
LCL,HCL

(96.0800720309199, 103.9199279690801)

![image.png](attachment:05028140-183a-4f43-9278-43ced74da29f.png)

In [5]:
s = 2.03 #population std
n =  10 #sample size
y = 214.7 #sample mean
M = 189 #population mean
Zobs = (y-M)/(s/np.sqrt(n))
Zobs

1.0021027850348623

In [6]:
alpha = 0.05
Z = st.norm.ppf(1-(alpha)) #upper tail
# Z = st.norm.ppf(1-(1-alpha)) #lower tail
# Z = st.norm.ppf(1-(alpha/2)) #2 tail

Z

1.6448536269514722

### Computing 𝛽 ( and power = 1- 𝛽)
For 1 tail:
![image.png](attachment:a67ffad6-0957-44d5-801b-f546e1830075.png)

For 2 tail:
![image.png](attachment:ca781540-c8d7-43b9-9a65-fc9312a8a71d.png)

In [45]:
s = 10 #population std
n =  25 #sample size
M0 = 104 #observed mean/daily mean yeild
MA = 100 #population mean/actual mean
Zobs = np.abs(M0-M)/(s/np.sqrt(n))
Zobs
alpha = 0.05
Z = st.norm.ppf(1-(alpha)) #upper tail
# Z = st.norm.ppf(1-(alpha/2)) #2 tail
beta = st.norm.cdf(Z-Zobs)
power = 1-beta
beta,power #Power is the probabilty of rejecting H0 when H0 is false

(0.3612399686876647, 0.6387600313123353)

### Sample size determination

To determine what sample size is required to detect a difference between a hypothesized and true mean (delta) with a given power (1-b) and specified significance level α and known standard deviation s.

1-Tailed:
![image.png](attachment:a63053e2-a083-436e-9a4d-528649f6bc64.png)

2-tailed:
![image.png](attachment:e9e688d6-938e-44c9-945a-dd2c836a129a.png)

![image.png](attachment:8c786851-15b6-464d-bf8b-99526f2f1eec.png)

In [50]:
s = 10 #population std
M0 = 104 #observed mean/daily mean yeild
MA = 100 #population mean/actual mean
beta = 0.8
alpha = 0.05
delta = np.abs(M0-MA)
Zalpha = st.norm.ppf(1-(alpha))
Zbeta = st.norm.ppf((beta))

n = (s**2)*((Zalpha+Zbeta)**2/(delta**2))
Zalpha,Zbeta,delta,n

(1.6448536269514722, 0.8416212335729143, 4, 38.64098270012354)

## P-Values
if p-value < α, reject Ho and p-value > α, fail to reject Ho.

![image.png](attachment:6baec55e-0760-4caa-b2fe-6ea130c1d46c.png)

In [60]:
alpha = 0.05
Z = st.norm.ppf(1-(alpha))
P = 1-st.norm.cdf(Z)
P,Z

(0.050000000000000044, 1.6448536269514722)

![image.png](attachment:48d89215-b6f9-447d-8856-4b9844a75382.png)

sigma = normal std deviation

In [18]:
# ogi_data = [7.23,7.25,7.28,7.29,7.32,7.26,7.27,7.24]
# data = np.array(ogi_data)
# data = np.sort(data)
# n = len(data) #if cal from data
# y = np.mean(data) #sample mean ,if cal from data
# v_sample = stat.variance(data)  #sample variance ,if cal from data
# s_sample = np.sqrt(v) #Sample Std deviation, cal from v_sample

y = 0.14 #sample mean, if given in question
n = 14 #if given in question
M = 0.1 #pop mean
v_sample = 0.06464615384615385  #sample variance ,given in question
s_sample = np.sqrt(v_sample) #Sample Std deviation, given in question
# s = 7.5 #Pop Std deviation, given in question

n,y,s_sample,M#s,data

(14, 0.14, 0.25425607927078925, 0.1)

In [17]:
# if population std deviation (s) is known
import scipy.stats as st
alpha = 0.01
# s= 0.5
#1T Test
Zobs = (y-M)/(s/np.sqrt(n)) 
# Z = st.norm.ppf(1-(alpha)) #UT
Z = st.norm.ppf(1-(1-alpha)) #LT

#2T Test
# Zobs = np.abs((y-M)/(s/np.sqrt(n)))
# Z = st.norm.ppf(1-(alpha/2)) #2T

Zobs,Z

(0.0, -2.3263478740408408)

![image.png](attachment:b74cb90f-9e68-47d0-b729-c3030c82ab0c.png)

In [4]:
# if population std deviation is un-known
from scipy.stats import t
alpha = 0.05
df = n-1
# tobs = (y-M)/(s_sample/np.sqrt(n)) #1T
tobs = np.abs((y-M)/(s_sample/np.sqrt(n))) #2T
# t = t.ppf(1 - (alpha), df) #UT
# t = t.ppf(1 - (1-alpha), df) #LT
t = t.ppf(1 - (alpha/2), df) #2T

tobs,t

(0.5886439211215839, 2.1603686564610127)