# HYPOTHESIS TESTING

## 1. State the Hypotheses statement:

In [1]:
import pandas as pd

In [2]:
# H0: μ_cost = 1000 + 5 * μ_units = 1000 + 5 * 600 = 4000

# The alternative hypothesis (H1) is that the true mean weekly cost is not equal to the theoretical mean cost.
# H1: μ_cost ≠ 4000


df = print("Null Hypothesis (H0): The true mean weekly operating cost is equal to $4000.")
df = print("Alternative Hypothesis (H1): The true mean weekly operating cost is not equal to $4000.")

Null Hypothesis (H0): The true mean weekly operating cost is equal to $4000.
Alternative Hypothesis (H1): The true mean weekly operating cost is not equal to $4000.


## 2. Calculate the Test Statistic:

In [3]:
# Given values:
sample_mean_cost = 3050  # Sample mean weekly cost
theoretical_mean_cost = 4000  # Theoretical mean weekly cost (calculated in previous step)
sigma_units = 25  # Standard deviation of units produced
sigma_cost = 5 * sigma_units # Standard deviation of the cost
sample_size = 25  # Sample size (number of restaurants)

# Calculate the standard error of the mean (σ / sqrt(n))
standard_error = sigma_cost / (sample_size**0.5)

# Calculate the test statistic (t)
test_statistic = (sample_mean_cost - theoretical_mean_cost) / standard_error

print(f"Sample Mean Weekly Cost (ˉxˉ): {sample_mean_cost}")
print(f"Theoretical Mean Weekly Cost (μ): {theoretical_mean_cost}")
print(f"Standard Deviation of Cost (σ): {sigma_cost}")
print(f"Sample Size (n): {sample_size}")
print(f"Standard Error of the Mean: {standard_error}")
print(f"Test Statistic (t): {test_statistic}")

Sample Mean Weekly Cost (ˉxˉ): 3050
Theoretical Mean Weekly Cost (μ): 4000
Standard Deviation of Cost (σ): 125
Sample Size (n): 25
Standard Error of the Mean: 25.0
Test Statistic (t): -38.0


## 3. Determine the Critical Value:

In [7]:
# Given alpha level
alpha = 0.05

# For a two-tailed test, the alpha is split equally in both tails.
# We need to find the z-score that leaves alpha/2 in the upper tail.
# We can use the percent point function (inverse of the CDF) of the standard normal distribution.
from scipy.stats import norm

alpha = 0.05
# For a two-tailed test, we divide alpha by 2
alpha_two_tailed = alpha / 2

# Find the critical value for the two-tailed test
critical_value = norm.ppf(1 - alpha_two_tailed)

print(f"The critical value for a two-tailed test with alpha = {alpha} is approximately: {critical_value:.4f}")

The critical value for a two-tailed test with alpha = 0.05 is approximately: 1.9600


## 4. Make a Decision:

In [8]:
from statsmodels.stats.weightstats import ztest

In [9]:
# Compare the test statistic with the critical value to decide whether to reject the null hypothesis.
# We reject the null hypothesis if the absolute value of the test statistic is greater than the critical value.

print(f"Test Statistic: {test_statistic}")
print(f"Critical Z-Value: {critical_value}")

if abs(test_statistic) > critical_value:
    print("Decision: Reject the null hypothesis (H0).")
else:
    print("Decision: Fail to reject the null hypothesis (H0).")

Test Statistic: -38.0
Critical Z-Value: 1.959963984540054
Decision: Reject the null hypothesis (H0).


In [10]:
if p_value < alpha:
    print("H1 is rejected and Ho is accepted ")
else: 
    print("Ho is rejected and H1 is accepted ")

Ho is rejected and H1 is accepted 


## 5. Conclusion:

# CHI-SQUARE TEST

In [11]:
import pandas as pd 
import numpy as np

In [12]:
pip install researchpy 

Note: you may need to restart the kernel to use updated packages.


### # Create a dictionary representing the contingency table data

In [13]:
contingency_data = {
    'Very Satisfied':[50,70],
    'Satisfied': [80,100],
    'Neutral': [60, 90],
    'Unsatisfied': [30 , 50],
    'Very Unsatisfied':[20,50]

}

# Create a pandas DataFrame from the dictionary
contingingency_table = pd.DataFrame(contingency_data)

# Set the index of the DataFrame
contingingency_table.index = ['Smart Thermostat', 'Smart Light']

# Print the created DataFrame
print("Contingency Table:")
display(contingingency_table)

Contingency Table:


Unnamed: 0,Very Satisfied,Satisfied,Neutral,Unsatisfied,Very Unsatisfied
Smart Thermostat,50,80,60,30,20
Smart Light,70,100,90,50,50


## Based on the p-value and the chosen significance level, determine whether to reject or fail to reject the null hypothesis and state the conclusion in the context of the problem.

## Reasoning: Compare the p-value to alpha and state the conclusion based on the comparison.

In [14]:
# Compare the p-value to alpha (significance level)
alpha = 0.05

print(f"P-Value: {p_value}")
print(f"Significance Level (alpha): {alpha}")

if p_value < alpha:
    print("\nDecision: Reject the null hypothesis.")
    print("Conclusion: There is a significant association between the type of smart home device and customer satisfaction level.")
else:
    print("\nDecision: Fail to reject the null hypothesis.")
    print("Conclusion: There is no significant association between the type of smart home device and customer satisfaction level.")

P-Value: 1.959963984540054
Significance Level (alpha): 0.05

Decision: Fail to reject the null hypothesis.
Conclusion: There is no significant association between the type of smart home device and customer satisfaction level.


## 1. State the Hypotheses:

In [15]:
if p_value < alpha:
    print("Ho is rejected and H1 is accepted ")
else: 
    print("H1 is rejected and Ho is accepted ")

H1 is rejected and Ho is accepted 


## 2. Compute the Chi-Square Statistic:

In [16]:
from scipy.stats import chi2_contingency

# Perform the Chi-Square test
chi2_statistic, p_value, dof, expected_frequencies = chi2_contingency(contingingency_table)

# Print the results
print(f"Chi-Square Statistic: {chi2_statistic}")
print(f"P-Value: {p_value}")

Chi-Square Statistic: 5.638227513227513
P-Value: 0.22784371130697179


## 3. Determine the Critical Value:

In [17]:
from scipy.stats import chi2

# Significance level
alpha = 0.05

# Degrees of freedom (number of rows - 1) * (number of columns - 1)
# From the contingency table: 2 rows (Smart Thermostat, Smart Light) and 3 columns (Satisfied, Neutral, Unsatisfied)
dof = (2 - 1) * (3 - 1)
print(f"Degrees of Freedom: {dof}")

# Determine the critical value using the percent point function (inverse of CDF)
critical_value = chi2.ppf(1 - alpha, dof)

print(f"Critical Value: {critical_value}")

Degrees of Freedom: 2
Critical Value: 5.991464547107979


## 4. Make a Decision:

In [18]:
# Compare the Chi-Square statistic with the critical value
chi2_statistic = 27.067669172932327  # From previous calculation
critical_value = 5.991464547107979 # From previous calculation

print(f"Chi-Square Statistic: {chi2_statistic}")
print(f"Critical Value: {critical_value}")

if chi2_statistic > critical_value:
    print("\nDecision: Reject the null hypothesis.")
    print("Reason: The Chi-Square statistic is greater than the critical value.")
else:
    print("\nDecision: Fail to reject the null hypothesis.")
    print("Reason: The Chi-Square statistic is less than or equal to the critical value.")

Chi-Square Statistic: 27.067669172932327
Critical Value: 5.991464547107979

Decision: Reject the null hypothesis.
Reason: The Chi-Square statistic is greater than the critical value.


## Summary: ##