# Hypothesis Testing

In [None]:
import numpy as np
from scipy import stats

# Data
before_treatment = np.array([120, 122, 118, 130, 125, 128, 115, 121, 123, 119])
after_treatment = np.array([115, 120, 112, 128, 122, 125, 110, 117, 119, 114])

# Step 1: Null and Alternate Hypotheses
# Null Hypothesis: The new drug has no effect on blood pressure.
# Alternate Hypothesis: The new drug has an effect on blood pressure.
null_hypothesis = "The new drug has no effect on blood pressure."
alternate_hypothesis = "The new drug has an effect on blood pressure."

# Step 2: Significance Level
alpha = 0.05

# Step 3: Paired T-test
t_statistic, p_value = stats.ttest_rel(after_treatment, before_treatment)

# Step 4: Calculate T-statistic manually
m = np.mean(after_treatment - before_treatment)
s = np.std(after_treatment - before_treatment, ddof=1)  # using ddof=1 for sample standard deviation
n = len(before_treatment)
t_statistic_manual = m / (s / np.sqrt(n))

# Step 5: Decision
if p_value <= alpha:
    decision = "Reject"
else:
    decision = "Fail to reject"

# Conclusion
if decision == "Reject":
    conclusion = "There is statistically significant evidence that the average blood pressure before and after treatment with the new drug is different."
else:
    conclusion = "There is insufficient evidence to claim a significant difference in average blood pressure before and after treatment with the new drug."

# Display results
print("T-statistic (from scipy):", t_statistic)
print("P-value (from scipy):", p_value)
print("T-statistic (calculated manually):", t_statistic_manual)
print(f"Decision: {decision} the null hypothesis at alpha={alpha}.")
print("Conclusion:", conclusion)


T-statistic (from scipy): -9.0
P-value (from scipy): 8.538051223166285e-06
T-statistic (calculated manually): -9.0
Decision: Reject the null hypothesis at alpha=0.05.
Conclusion: There is statistically significant evidence that the average blood pressure before and after treatment with the new drug is different.


In [None]:
import scipy.stats as stats
import math
import numpy as np

# Given data
sample_data = np.array(
    [205, 198, 210, 190, 215, 205, 200, 192, 198, 205, 198, 202, 208, 200, 205, 198, 205, 210, 192, 205, 198, 205, 210, 192, 205])
population_std_dev = 5
population_mean = 200
sample_size = len(sample_data)

# Step 1: Define the Hypotheses
# Null Hypothesis (H0): The average cholesterol level in a population is 200 mg/dL.
# Alternate Hypothesis (H1): The average cholesterol level in a population is different from 200 mg/dL.

# Step 2: Define the Significance Level
alpha = 0.05  # Two-tailed test

# Critical values for a significance level of 0.05 (two-tailed)
critical_value_left = stats.norm.ppf(alpha/2)
critical_value_right = -critical_value_left

# Step 3: Compute the test statistic
sample_mean = sample_data.mean()
z_score = (sample_mean - population_mean) / \
    (population_std_dev / math.sqrt(sample_size))

# Step 4: Result
# Check if the absolute value of the test statistic is greater than the critical values
if abs(z_score) > max(abs(critical_value_left), abs(critical_value_right)):
    print("Reject the null hypothesis.")
    print("There is statistically significant evidence that the average cholesterol level in the population is different from 200 mg/dL.")
else:
    print("Fail to reject the null hypothesis.")
    print("There is not enough evidence to conclude that the average cholesterol level in the population is different from 200 mg/dL.")


Reject the null hypothesis.
There is statistically significant evidence that the average cholesterol level in the population is different from 200 mg/dL.


Z test

In [None]:
# Import the necessary libraries
import numpy as np
import scipy.stats as stats

# Given information
sample_mean = 110
population_mean = 100
population_std = 15
sample_size = 50
alpha = 0.05

# compute the z-score
z_score = (sample_mean-population_mean)/(population_std/np.sqrt(50))
print('Z-Score :',z_score)

# Approach 1: Using Critical Z-Score

# Critical Z-Score
z_critical = stats.norm.ppf(1-alpha)
print('Critical Z-Score :',z_critical)

# Hypothesis
if z_score >  z_critical:
    print("Reject Null Hypothesis")
else:
  print("Fail to Reject Null Hypothesis")

# Approach 2: Using P-value

# P-Value : Probability of getting less than a Z-score
p_value = 1-stats.norm.cdf(z_score)

print('p-value :',p_value)

# Hypothesis
if p_value <  alpha:
    print("Reject Null Hypothesis")
else:
  print("Fail to Reject Null Hypothesis")


Z-Score : 4.714045207910317
Critical Z-Score : 1.6448536269514722
Reject Null Hypothesis
p-value : 1.2142337364462463e-06
Reject Null Hypothesis


T test


In [None]:
import scipy.stats as stats
import numpy as np

# Define the population mean weight
population_mean = 45

# Define the sample mean weight and standard deviation
sample_mean = 75
sample_std = 25

# Define the sample size
sample_size = 25

# Calculate the t-statistic
t_statistic = (sample_mean - population_mean) / (sample_std / np.sqrt(sample_size))

# Define the degrees of freedom
df = sample_size - 1

# Set the significance level (alpha)
alpha = 0.05

# Calculate the critical t-value
critical_t = stats.t.ppf(1 - alpha, df)

# Calculate the p-value
p_value = 1 - stats.t.cdf(t_statistic, df)

# Print the results
print("T-Statistic:", t_statistic)
print("Critical t-value:", critical_t)
print("P-Value:", p_value)

# Decision
print('With T-value :')
if t_statistic > critical_t:
    print("""There is a significant difference in weight before and after the camp.
    The fitness camp had an effect.""")
else:
    print("""There is no significant difference in weight before and after the camp.
    The fitness camp did not have a significant effect.""")

print('With P-value :')
if p_value >alpha:
    print("""There is a significant difference in weight before and after the camp.
    The fitness camp had an effect.""")
else:
    print("""There is no significant difference in weight before and after the camp.
    The fitness camp did not have a significant effect.""")


T-Statistic: 6.0
Critical t-value: 1.7108820799094275
P-Value: 1.703654035845048e-06
With T-value :
There is a significant difference in weight before and after the camp. 
    The fitness camp had an effect.
With P-value :
There is no significant difference in weight before and after the camp. 
    The fitness camp did not have a significant effect.


# Confidence interval


In [None]:
import scipy.stats as stats
import maths

# Given values
sample_mean = 240
sample_std_dev = 25
sample_size = 10
confidence_level = 0.95

# DF
df = sample_size - 1

# Significance level (α)
alpha = (1 - confidence_level) / 2

# t-value from the t-distribution table
t_value = stats.t.ppf(1 - alpha, df)

margin_of_error = t_value * (sample_std_dev / math.sqrt(sample_size))

lower_limit = sample_mean - margin_of_error
upper_limit = sample_mean + margin_of_error

print(f"Confidence Interval: ({lower_limit}, {upper_limit})")


Confidence Interval: (222.1160773502915, 257.8839226497085)
