In [1]:
import numpy as np
from scipy.stats import t

# Point Interval Estimation

In [2]:
def mean_confidence_interval(data, confidence=0.40):
    n = len(data)
    mean = np.mean(data)
    std_err = np.std(data, ddof=1) / np.sqrt(n)
    margin_of_error = std_err * t.ppf((1 + confidence) / 2, n - 1)
    lower_bound = mean - margin_of_error
    upper_bound = mean + margin_of_error
    return mean, lower_bound, upper_bound

data = [2, 3, 4, 10, 12, 23, 22, 33, 44, 55]
confidence_level = 0.40
mean, lower_bound, upper_bound = mean_confidence_interval(data, confidence_level)
print(f"Mean: {mean}")
print(f"Confidence Interval ({int(confidence_level * 100)}%): [{lower_bound}, {upper_bound}]")

Mean: 20.8
Confidence Interval (40%): [17.65719891750294, 23.942801082497063]


# CONFIDENCE INTERVAL

In [None]:
# def compare_means_and_confidence_interval(data1, data2, confidence=0.40):
   
    n1 = len(data1)
    n2 = len(data2)
    mean1 = np.mean(data1)
    mean2 = np.mean(data2)
    std1 = np.std(data1, ddof=1)
    std2 = np.std(data2, ddof=1)
    std_err = np.sqrt((std1**2 / n1) + (std2**2 / n2))
    t_critical = t.ppf((1 + confidence) / 2, n1 + n2 - 2)
    
    t_statistic = (mean1 - mean2) / std_err
    reject_null = np.abs(t_statistic) > t_critical
    
    mean_diff = mean1 - mean2
    margin_of_error = t_critical * std_err
    lower_bound = mean_diff - margin_of_error
    upper_bound = mean_diff + margin_of_error
    
    return reject_null, (lower_bound, upper_bound)

data1 = [55, 44, 51, 61, 70]
data2 = [95, 72, 64, 57, 65]
confidence_level = 0.40
reject_null, confidence_interval = compare_means_and_confidence_interval(data1, data2, confidence_level)

if reject_null:
    print("Null hypothesis rejected: There is a significant difference between the means.")
else:
    print("Null hypothesis not rejected: There is no significant difference between the means.")

print(f"Confidence Interval ({int(confidence_level * 100)}%): {confidence_interval}")

# Hypothesis Test

In [4]:
def student_t_test(sample1, sample2, a=0.09):
    
    n1 = len(sample1)
    n2 = len(sample2)
    mean1 = np.mean(sample1)
    mean2 = np.mean(sample2)
    std1 = np.std(sample1, ddof=1)
    std2 = np.std(sample2, ddof=1)
    
    pooled_std = np.sqrt((std1**2 / n1) + (std2**2 / n2))
    t_statistic = (mean1 - mean2) / pooled_std
    degrees_of_freedom = n1 + n2 - 2
    p_value = 2 * (1 - t.cdf(abs(t_statistic), df=degrees_of_freedom))

    reject_null = p_value < a

    return reject_null, t_statistic, p_value


sample1 = [55, 44, 51, 61, 70]
sample2 = [95, 72, 64, 57, 65]
a= 0.09

reject_null, t_statistic, p_value = student_t_test(sample1, sample2, a)

if reject_null:
    print("Reject the null hypothesis: There is a significant difference between the means.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference between the means.")

print(f"t-statistic: {t_statistic}")
print(f"p-value: {p_value}")

Fail to reject the null hypothesis: There is no significant difference between the means.
t-statistic: -1.822930860750599
p-value: 0.10577632717102903


# One Tailed T-Test

In [5]:
def one_tailed_t_test(sample, null_mean, alternative='Greater', a=0.09):
   
    n = len(sample)
    sample_mean = np.mean(sample)
    sample_std = np.std(sample, ddof=1)
    t_statistic = (sample_mean - null_mean) / (sample_std / np.sqrt(n))
    
    if alternative == 'Greater':
        p_value = 1 - t.cdf(t_statistic, df=n - 1)
        reject_null = p_value < a
    elif alternative == 'Less':
        p_value = t.cdf(t_statistic, df=n - 1)
        reject_null = p_value < a
    else:
        raise ValueError("Invalid alternative hypothesis. Choose either 'greater' or 'less'.")

    return reject_null, t_statistic, p_value


sample = [2, 3, 4, 10, 12, 23, 22, 33, 44, 55]
null_mean = 11
alternative = 'Greater'
a = 0.09

reject_null, t_statistic, p_value = one_tailed_t_test(sample, null_mean, alternative, a)

if reject_null:
    print("Reject the null hypothesis: The sample mean is significantly greater than the null mean")
else:
    print("Fail to reject the null hypothesis: The sample mean is not significantly greater than the null mean")

print(f"t-statistic: {t_statistic}")
print(f"p-value: {p_value}")

Reject the null hypothesis: The sample mean is significantly greater than the null mean
t-statistic: 1.6947004364718832
p-value: 0.062186762953828745


# Two Tailed T-Test

In [8]:
def two_tailed_t_test(sample, null_mean, a=0.09):
   
    n = len(sample)
    sample_mean = np.mean(sample)
    sample_std = np.std(sample, ddof=1)
    t_statistic = (sample_mean - null_mean) / (sample_std / np.sqrt(n))
    degrees_of_freedom = n - 1
    
    p_value = 2 * (1 - t.cdf(abs(t_statistic), df=degrees_of_freedom))

    reject_null = p_value < a

    return reject_null, t_statistic, p_value

sample = [2, 3, 4, 10, 12, 23, 22, 33, 44, 55]
null_mean = 11
a = 0.09

reject_null, t_statistic, p_value = two_tailed_t_test(sample, null_mean, a)

if reject_null:
    print("Reject the null hypothesis: The sample mean is significantly different from the null mean.")
else:
    print("Fail to reject the null hypothesis: The sample mean is not significantly different from the null mean.")

print(f"t-statistic: {t_statistic}")
print(f"p-value: {p_value}")

Fail to reject the null hypothesis: The sample mean is not significantly different from the null mean.
t-statistic: 1.6947004364718832
p-value: 0.12437352590765749
