In [4]:
import numpy as np

# HYPOTHESIS TESTING

In [2]:
# 1. State the Hypotheses statement:

'''
To test if actual costs are higher than predicted.

Null Hypothesis(Ho):
    actual mean weekly cost = model predicted cost
    x_bar = μ 
Alternative Hypothesis(H1):
    actual mean weekly cost > model predicted cost
    x_bar > μ

This is a Right-tailed 
'''

In [7]:
# 2. Calculate the Test Statistic:

sample_mean = 3050
theoretical_mean = 1000 + (5*600)
sd = 5 * 25
n = 25

z = (sample_mean - theoretical_mean) / (sd / np.sqrt(n))
print(z)

-38.0


In [15]:
# 3. Determine the Probability and compare: Using the alpha level of 5% (α = 0.05)

from scipy.stats import norm

alpha = 0.05
z_critical = norm.ppf(1 - alpha)
print(z_critical) # z_critical is higher than z

1.6448536269514722


In [18]:
# 4. Make a Decision:

if (z > z_critical):
    print("Ho is Rejected and H1 is Accepted. It means actual mean weekly cost > model predicted cost")
else:
    print("Ho is Accepted and H1 is Rejected. It means actual mean weekly cost = model predicted cost")

Ho is Accepted and H1 is Rejected. It means actual mean weekly cost = model predicted cost


In [None]:
# 5. Conclusion:

'''
There is no evidence that the weekly operating costs are higher than model predicted.
We observed that sample mean (₹3050) is lower than the model’s predicted ₹4000.
So, the restaurant owners claim that “costs have increased” is not supported by this data.
'''

# CONFIDENCE INTERVALS

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import t, norm

In [2]:
# confidence interval

data = np.array([1.13, 1.55, 1.43, 0.92, 1.25, 1.36, 1.32, 0.85, 1.07, 1.48, 1.20, 1.33, 1.18, 1.22, 1.29])
n = len(data)
sample_mean = data.mean()
sample_std = data.std()
print("Sample Mean:", sample_mean)
print("Sample_Standard Deviation:", sample_std)

Sample Mean: 1.2386666666666666
Sample_Standard Deviation: 0.18661427836285438


In [3]:
# a. Build 99% Confidence Interval Using Sample Standard Deviation

alpha = 0.01
df = n - 1
t_critical = t.ppf(1 - alpha/2, df)

# Margin of error
margin = t_critical * (sample_std / np.sqrt(n))

lower = sample_mean - margin
upper = sample_mean + margin

print('t_critical value:', t_critical)
print("99% Confidence Interval using Sample SD:", '[',np.round(lower,2), ',', np.round(upper, 2), ']')

t_critical value: 2.976842734370834
99% Confidence Interval using Sample SD: [ 1.1 , 1.38 ]


In [4]:
# Interpretation:
'''
We are 99% confident that the true mean durability lies between 1.10 and 1.38 million characters.
'''

'\nWe are 99% confident that the true mean durability lies between 1.10 and 1.38 million characters.\n'

In [5]:
# b. Build 99% Confidence Interval Using Known Population Standard Deviation

sigma = 0.2
z_critical = norm.ppf(1 - alpha/2)

margin_z = z_critical * (sigma / np.sqrt(n))
lower_z = sample_mean - margin_z
upper_z = sample_mean + margin_z

print('z_critical value:', z_critical)
print("99% Confidence Interval using Population SD:", '[',np.round(lower_z,2), ',', np.round(upper_z, 2), ']')

z_critical value: 2.5758293035489004
99% Confidence Interval using Population SD: [ 1.11 , 1.37 ]


In [6]:
# Interpretation:
'''
We are 99% confident that the true mean durability lies between 1.115 and 1.381 million characters when population SD is known.
'''

'\nWe are 99% confident that the true mean durability lies between 1.115 and 1.381 million characters when population SD is known.\n'