In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import scipy.stats as stats
import math

###ESTIMATION AND CONFIDENCE INTERVALS

Data
A total of 15 print-heads were randomly selected and tested until failure. The durability of each print-head (in millions of characters) was recorded as follows:
1.13, 1.55, 1.43, 0.92, 1.25, 1.36, 1.32, 0.85, 1.07, 1.48, 1.20, 1.33, 1.18, 1.22, 1.29


a. Build 99% Confidence Interval Using Sample Standard Deviation
Assuming the sample is representative of the population, construct a 99% confidence interval for the mean number of characters printed before the print-head fails using the sample standard deviation. Explain the steps you take and the rationale behind using the t-distribution for this task.


In [30]:
data = np.array([1.13, 1.55, 1.43, 0.92, 1.25, 1.36, 1.32, 0.85,
                 1.07, 1.48, 1.20, 1.33, 1.18, 1.22, 1.29])


In [31]:
len(data)

15

In [32]:
n=15


In [33]:
x_bar = np.mean(data)
x_bar

np.float64(1.2386666666666666)

In [34]:
s = np.std(data, ddof=1)   # Sample std dev (ddof=1 for sample)
s

np.float64(0.19316412956959936)

In [35]:
CI=0.99,
alpha=0.01,
df=14   # Degrees of freedom = n-1=15-1=14

99% CI using t-distribution (σ unknown)

In [36]:
0.01/2

0.005

In [37]:
0.005*2

0.01

In [39]:
t_critical=stats.t.ppf(0.005,df)
t_critical


np.float64(-2.9768427343708344)

In [56]:
std_error=s/np.sqrt(n)  #Standard error using sample std
std_error

np.float64(0.04987476379384733)

In [40]:
### Margin of Error
Margin_error= t_critical*(s/np.sqrt(n))
Margin_error

np.float64(-0.148469328228176)

In [57]:
ci_lower_t = x_bar - Margin_error
ci_upper_t = x_bar + Margin_error
print(ci_lower_t, ci_upper_t)

1.3871359948948425 1.0901973384384906


In [58]:
print(f'99% CI: ({ci_lower_t:.4f}, {ci_upper_t:.4f}) million characters')
print(f'Interpretation: 99% confident true mean durability is between {ci_lower_t:.3f} and {ci_upper_t:.3f}M chars')

99% CI: (1.3871, 1.0902) million characters
Interpretation: 99% confident true mean durability is between 1.387 and 1.090M chars


b. Build 99% Confidence Interval Using Known Population Standard Deviation
If it were known that the population standard deviation is 0.2 million characters, construct a 99% confidence interval for the mean number of characters printed before failure.



In [59]:
sigma=0.2 #Population standard deviation

In [69]:
z_critical=stats.norm.ppf(0.005)
z_critical

np.float64(-2.575829303548901)

In [61]:
std_err=sigma/np.sqrt(n) #Standard error using population sigma
std_err

np.float64(0.051639777949432225)

In [66]:
margin_error_z=z_critical*(sigma/np.sqrt(n))
margin_error_z

np.float64(-0.13301525327090588)

In [64]:
Margin_error_z = z_critical*std_err
Margin_error_z

np.float64(-0.13301525327090588)

In [67]:
ci_lower_z = x_bar - margin_error_z
ci_upper_z = x_bar + margin_error_z
print(ci_lower_z, ci_upper_z)

1.3716819199375725 1.1056514133957607


In [68]:
print(f'99% CI: ({ci_lower_z:.4f}, {ci_upper_z:.4f}) million characters')
print(f'Interpretation: 99% confident true mean durability is between {ci_lower_z:.3f} and {ci_upper_z:.3f}M chars')

99% CI: (1.3717, 1.1057) million characters
Interpretation: 99% confident true mean durability is between 1.372 and 1.106M chars


###HYPOTHESIS TESTING

Data Provided:
●	The theoretical weekly operating cost model: W = $1,000 + $5X
●	Sample of 25 restaurants with a mean weekly cost of Rs. 3,050
●	Number of units produced in a week (X) follows a normal distribution with a mean (μ) of 600 units and a standard deviation (σ) of 25 units


In [88]:
# GIVEN DATA
n = 25                   # sample size
x_bar = 3050             # sample mean weekly cost
mean_x = 600             # mean units produced
sigma_x = 25             # std dev units
fixed_cost = 1000        # fixed cost
variable_cost = 5        # cost per unit
alpha = 0.05

In [89]:
# Theoretical mean weekly cost
mean_w = fixed_cost + variable_cost * mean_x
mean_w

4000

In [90]:
print(f'Expected units (Mean_X): {mean_x}')
print(f'Theoretical mean cost (Mean_W): {mean_w}')

Expected units (Mean_X): 600
Theoretical mean cost (Mean_W): 4000


In [87]:
sigma_w = variable_cost * sigma_x
print(f'Std dev of cost (σ_W): {sigma_w}')

Std dev of cost (σ_W): 125


1. State the Hypotheses statement:

H0: Mean = 4000 (cost model is correct) (Null Hypothesis)

Ha: Mean > 4000 (cost model is incorrect) (Alternate Hypothesis)

Right-tailed z-test

2. Calculate the Test Statistic:

In [91]:
std_err= sigma_w/np.sqrt(n)
std_err

np.float64(25.0)

In [96]:
z_stats = (x_bar - mean_w)/std_err
z_stats

np.float64(-38.0)

In [98]:
z_critical = stats.norm.ppf(1-alpha)  ## Right-tailed critical value
z_critical

np.float64(1.6448536269514722)

In [100]:
p_value = 1 - stats.norm.cdf(z_stats)    # Right-tailed p-value
p_value

np.float64(1.0)

3. Determine the Probability and compare:
Using the alpha level of 5% (α = 0.05),


decision = "REJECT H0" if z_stat > z_critical else "FAIL TO REJECT H0"

In [105]:
print(f'Reason: z = {z_stats:.2f} {'>' if z_stats > z_critical else '<='} {z_critical:.3f}')

Reason: z = -38.00 <= 1.645


In [104]:
print(f'{'p=' if p_value > alpha else ''}p-value {p_value:.4f} {'>' if p_value > alpha else '<='} α=0.05')

p=p-value 1.0000 > α=0.05


4. Make a Decision:
Compare the test statistic with the critical value to decide whether to reject the null hypothesis.


In [107]:
print(f"z-critical (α=0.05, right-tailed): {z_critical:.3f}")
print(f"p-value: {p_value:.4f}")

decision = "REJECT H0" if z_stats > z_critical else "FAIL TO REJECT H0"
print(f"\nDecision: {decision}")
print(f"Reason: z = {z_stats:.2f} {'>' if z_stats > z_critical else '<='} {z_critical:.3f}")
print(f"{'p=' if p_value > alpha else ''}p-value {p_value:.4f} {'>' if p_value > alpha else '<='} α=0.05")

z-critical (α=0.05, right-tailed): 1.645
p-value: 1.0000

Decision: FAIL TO REJECT H0
Reason: z = -38.00 <= 1.645
p=p-value 1.0000 > α=0.05


5. Conclusion:

Here,

z_critical > z_stats

" Fail to reject H0 "

" No evidence that weekly costs > model"

" Sample mean (3050) is actually LOWER than model (4000)"

" Owner's claim NOT supported by this data"

" The sample mean (3050) being lower than theoretical (4000) means owners' "higher cost" claim has zero statistical support "