In [None]:
# Import necessary libraries for data analysis and statistics
import codecademylib3  # Codecademy library for running code
import pandas as pd  # For handling data in tabular format
import numpy as np  # For numerical computations
from scipy.stats import ttest_1samp  # For one-sample t-tests

In [None]:
# Load the heart disease dataset into a DataFrame
heart = pd.read_csv('heart_disease.csv')  # Reads the CSV file into a DataFrame

# Split the data based on the presence or absence of heart disease
yes_hd = heart[heart.heart_disease == 'presence']  # Subset for patients with heart disease
no_hd = heart[heart.heart_disease == 'absence']  # Subset for patients without heart disease

In [None]:
# Preview the first few rows of data for patients with heart disease
print(yes_hd.head())  # Prints the first 5 rows of the subset

# Extract cholesterol levels of patients with heart disease
chol_hd = yes_hd.chol

# Calculate and print the mean cholesterol level of patients with heart disease
np.mean(chol_hd)  # Computes the mean of cholesterol levels

# Perform a one-sample t-test on cholesterol levels against a mean value of 240
tstat, pval = ttest_1samp(chol_hd, 240)

# Print the p-value from the t-test to determine statistical significance
print(pval)

In [None]:


# Extract cholesterol levels of patients without heart disease
chol_no_hd = no_hd.chol

# Calculate and print the mean cholesterol level of patients without heart disease
np.mean(chol_no_hd)  # Computes the mean of cholesterol levels

# Perform a one-sample t-test on cholesterol levels against a mean value of 240
tstat, pval = ttest_1samp(chol_no_hd, 240)

# Print the p-value from the t-test to determine statistical significance
print(pval)

# Print the total number of patients in the dataset
print(len(heart))

# Calculate and print the number of patients with fasting blood sugar > 120 mg/dL
num_highfbs_patients = np.sum(heart.fbs)  # Sums the binary column indicating high fasting blood sugar
print(num_highfbs_patients)

# Calculate 8% of the total number of patients
print(0.08 * len(heart))

In [None]:

# Perform a binomial test to check if the number of high fasting blood sugar patients is significantly higher than expected
from scipy.stats import binom_test
pval = binom_test(num_highfbs_patients, len(heart), 0.08, alternative='greater')

# Print the p-value from the binomial test
print(pval)
