#  Hypothesis Testing <hr style="border:2.5px solid #126782"></hr>

Data Analyst: Gyro A. Madrona<br>
Department: Electrical Engineering

In [27]:
#%pip install statsmodels --upgrade

In [28]:
# imports and packages
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.stats.power import TTestPower

In [29]:
# load dataset
df = pd.read_csv(r"raw\defects-30-sample.csv",
                 delimiter=",")
df

Unnamed: 0,Sample,Defects
0,1,13
1,2,12
2,3,10
3,4,11
4,5,10
5,6,6
6,7,12
7,8,12
8,9,9
9,10,15


In [30]:
# summary of dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Sample   30 non-null     int64
 1   Defects  30 non-null     int64
dtypes: int64(2)
memory usage: 612.0 bytes


In [31]:
# summary of statistics
df.describe()

Unnamed: 0,Sample,Defects
count,30.0,30.0
mean,15.5,11.033333
std,8.803408,2.566137
min,1.0,5.0
25%,8.25,9.25
50%,15.5,11.5
75%,22.75,13.0
max,30.0,15.0


In [32]:
# parameters
pop_mean = 10.32
pop_std = 3.17
sample_mean = df['Defects'].mean()
n = df['Defects'].count()

In [33]:
# z-score
Z_score = (sample_mean-pop_mean)/(pop_std/np.sqrt(n))
Z_score

np.float64(1.2325197403375971)

In [34]:
# two-tailed test
p_value = 2*(1-stats.norm.cdf(Z_score))
p_value

np.float64(0.21775499725951342)

In [35]:
# verdict
alpha = 0.05

if p_value < alpha:
    # reject null hypothesis
    print("The average defect rate increases to 11.03 units.")
else:
    # fail to reject null hypothesis
    print("The average defect rate is the same.")

The average defect rate is the same.


In [36]:
# one-tailed test
p_value = 1-stats.norm.cdf(Z_score)
p_value

np.float64(0.10887749862975671)

# Power

In [37]:
# cohen's d
sample_mean = 11.03
pop_mean = 10.32
pop_std = 3.17

cohen_d = abs((sample_mean - pop_mean)/pop_std)
cohen_d

0.22397476340693978

In [38]:
# power of a test
power = TTestPower().power(
    effect_size = d,
    nobs = 30,
    alpha = 0.05,
)
power

np.float64(0.22041216027828034)

In [39]:
# 80% power sample size
sample_80p = TTestPower().solve_power(
    effect_size = d,
    power= 0.8, # target power
    alpha = 0.05,
)
sample_80p

158.3933039889269