<a href="https://colab.research.google.com/github/AbhiNahi1/ISPA-Case-Study/blob/main/Untitled14.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import pandas as pd
from scipy import stats

# Load the dataset
df = pd.read_csv('/content/heart.csv')

# Let's inspect the data to understand the columns
print("Dataset columns:", df.columns)
print("\nFirst 5 rows of the dataset:")
print(df.head())

# ==============================================================================
# 1. ONE-SAMPLE HYPOTHESIS TEST
# ==============================================================================
# Objective: Test if the mean cholesterol level in the sample is significantly
# different from a known population mean.
# Null Hypothesis (H₀): The mean cholesterol is equal to the reference value.
# Alternative Hypothesis (H₁): The mean cholesterol is not equal to the reference value.
# A common reference value for cholesterol is < 200 mg/dL. We'll use this.
# Note: The dataset contains 0s for cholesterol, which are likely missing values.
# We will filter these out for a more accurate test.

print("\n" + "="*70)
print("1. ONE-SAMPLE HYPOTHESIS TEST")
print("="*70)

# Filter out the 0s from the 'Cholesterol' column
cholesterol_data = df[df['Cholesterol'] > 0]['Cholesterol']
reference_value = 200

# Perform the one-sample t-test
t_stat, p_value = stats.ttest_1samp(a=cholesterol_data, popmean=reference_value)

print(f"Sample Mean Cholesterol: {cholesterol_data.mean():.2f}")
print(f"Reference Value: {reference_value}")
print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4f}")

alpha = 0.05
if p_value < alpha:
    print("Conclusion: We reject the null hypothesis. The sample mean cholesterol is significantly different from the reference value.")
else:
    print("Conclusion: We fail to reject the null hypothesis. The sample mean cholesterol is not significantly different from the reference value.")

# ==============================================================================
# 2. TWO-SAMPLE HYPOTHESIS TEST
# ==============================================================================
# Objective: Test if the mean maximum heart rate ('MaxHR') is significantly
# different between male and female patients.
# Null Hypothesis (H₀): The mean MaxHR for males is equal to the mean MaxHR for females.
# Alternative Hypothesis (H₁): The mean MaxHR for males is not equal to the mean MaxHR for females.

print("\n" + "="*70)
print("2. TWO-SAMPLE HYPOTHESIS TEST")
print("="*70)

# Separate the data into two groups: male and female
male_max_hr = df[df['Sex'] == 'M']['MaxHR']
female_max_hr = df[df['Sex'] == 'F']['MaxHR']

# Perform the independent two-sample t-test
t_stat, p_value = stats.ttest_ind(a=male_max_hr, b=female_max_hr, equal_var=False)

print(f"Mean MaxHR for Males: {male_max_hr.mean():.2f}")
print(f"Mean MaxHR for Females: {female_max_hr.mean():.2f}")
print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4f}")

alpha = 0.05
if p_value < alpha:
    print("Conclusion: We reject the null hypothesis. The mean MaxHR is significantly different between males and females.")
else:
    print("Conclusion: We fail to reject the null hypothesis. The mean MaxHR is not significantly different between males and females.")

# ==============================================================================
# 3. ONE-WAY ANOVA
# ==============================================================================
# Objective: Test if the mean resting blood pressure ('RestingBP') is significantly
# different across the four types of chest pain ('ChestPainType').
# Null Hypothesis (H₀): The mean RestingBP is the same across all ChestPainType groups.
# Alternative Hypothesis (H₁): At least one ChestPainType group has a different mean RestingBP.

print("\n" + "="*70)
print("3. ONE-WAY ANOVA")
print("="*70)

# Separate the data into groups based on the 'ChestPainType'
cp_types = df['ChestPainType'].unique()
print(f"Chest Pain Types (Groups): {cp_types}")
groups = [df['RestingBP'][df['ChestPainType'] == cp] for cp in cp_types]

# Perform the one-way ANOVA test
f_stat, p_value = stats.f_oneway(*groups)

print(f"F-statistic: {f_stat:.4f}")
print(f"P-value: {p_value:.4f}")

alpha = 0.05
if p_value < alpha:
    print("Conclusion: We reject the null hypothesis. There is a significant difference in mean RestingBP among the ChestPainType groups.")
    # If ANOVA is significant, you must perform post-hoc tests.
    # The statsmodels library has a good implementation of Tukey's HSD test.
    try:
        from statsmodels.stats.multicomp import pairwise_tukeyhsd
        print("\nPerforming Post-Hoc Test (Tukey HSD)...")
        tukey_result = pairwise_tukeyhsd(endog=df['RestingBP'], groups=df['ChestPainType'], alpha=0.05)
        print(tukey_result)
        print("\nInterpretation of Tukey HSD:")
        print("Each row in the table compares two groups. A 'True' value in the 'reject' column indicates a significant difference between those two groups.")
    except ImportError:
        print("\nTo perform a post-hoc test like Tukey's HSD, please install statsmodels: `pip install statsmodels`")
else:
    print("Conclusion: We fail to reject the null hypothesis. There is no significant difference in mean RestingBP among the ChestPainType groups.")

Dataset columns: Index(['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope',
       'HeartDisease'],
      dtype='object')

First 5 rows of the dataset:
   Age Sex ChestPainType  RestingBP  Cholesterol  FastingBS RestingECG  MaxHR  \
0   40   M           ATA        140          289          0     Normal    172   
1   49   F           NAP        160          180          0     Normal    156   
2   37   M           ATA        130          283          0         ST     98   
3   48   F           ASY        138          214          0     Normal    108   
4   54   M           NAP        150          195          0     Normal    122   

  ExerciseAngina  Oldpeak ST_Slope  HeartDisease  
0              N      0.0       Up             0  
1              N      1.0     Flat             1  
2              N      0.0       Up             0  
3              Y      1.5     Flat             1  
4              