# Confidence Interval

In [None]:
import numpy as np
from scipy import stats

In [None]:
data = np.array([1.13, 1.55, 1.43, 0.92, 1.25, 1.36, 1.32, 0.85, 1.07, 1.48, 1.20, 1.33, 1.18, 1.22, 1.29])

In [None]:
n = len(data)
n

15

In [None]:
confidence_level = 0.99

In [None]:
sample_mean = np.mean(data)
sample_mean

np.float64(1.2386666666666666)

In [None]:
sample_std = np.std(data, ddof=1)
sample_std

np.float64(0.19316412956959936)

In [None]:
print(f"Sample Mean (x̄): {sample_mean:.4f} million chars")
print(f"Sample Std Dev (s): {sample_std:.4f} million chars")
print("-" * 50)

Sample Mean (x̄): 1.2387 million chars
Sample Std Dev (s): 0.1932 million chars
--------------------------------------------------


## Task (a): CI using Sample Standard Deviation (t-distribution)

In [None]:
# Rationale: Population standard deviation is UNKNOWN, and n < 30.
# We use the t-distribution with df = n - 1.
ci_t = stats.t.interval(0.99,
    df=n - 1,
    loc=sample_mean,
    scale=sample_std / np.sqrt(n))

print("Task (a) - T-Distribution CI (Sample Std Dev):")
print(f"99% CI: ({ci_t[0]:.3f}, {ci_t[1]:.3f}) million characters")
print(f"Rationale: Used t-distribution because sample size is small and population standard deviation is unknown.")
print("-" * 50)



Task (a) - T-Distribution CI (Sample Std Dev):
99% CI: (1.090, 1.387) million characters
Rationale: Used t-distribution because sample size is small and population standard deviation is unknown.
--------------------------------------------------


# **Task (b): CI using Known Population Standard Deviation (Z-distribution)**

In [None]:
known_pop_std = 0.2

In [33]:

# Rationale: Population standard deviation (σ) is KNOWN.
# We use the Z-distribution (norm.interval) regardless of sample size.

ci_z = stats.norm.interval(0.99,
    loc=sample_mean,
    scale=known_pop_std / np.sqrt(n)
)

print("Task (b) - Z-Distribution CI (Known Pop Std Dev):")
print(f"99% CI: ({ci_z[0]:.3f}, {ci_z[1]:.3f}) million characters")
print(f"Rationale: Used Z-distribution because the population standard deviation (σ = {known_pop_std}) is known.")
print("-" * 50)

Task (b) - Z-Distribution CI (Known Pop Std Dev):
99% CI: (1.106, 1.372) million characters
Rationale: Used Z-distribution because the population standard deviation (σ = 0.2) is known.
--------------------------------------------------


# Final Interpretation:


T-Distribution CI (1.090, 1.387): We are 99% confident that the true mean durability of all print-heads lies between 1.090 and 1.387 million characters.



Z-Distribution CI (1.106, 1.372): This interval is narrower than the t-distribution interval. This is because having a known population standard deviation (as opposed to estimating it from the small sample) reduces the uncertainty in our estimation, resulting in a smaller margin of error and a more precise estimate.