# **Importing Data**

In [None]:
import pandas as pd

ds=pd.read_csv("heart_attack_data_Cleaned.csv")
ds.head()

Unnamed: 0,age,sex,total_cholesterol,ldl,hdl,systolic_bp,diastolic_bp,smoking,diabetes,heart_attack
0,57,Male,229.463642,175.879129,39.225687,124.070127,91.37878,Non Smoker,No,No
1,58,Male,186.46412,128.984916,34.950968,95.492552,64.35504,Smoker,No,No
2,37,Male,251.300719,152.347592,45.913288,99.519335,64.953147,Non Smoker,Yes,No
3,55,Male,192.058908,116.803684,67.208925,122.460002,73.821382,Non Smoker,No,No
4,53,Male,151.203449,107.017396,60.693838,123.022257,81.121946,Non Smoker,Yes,Yes


# **Checking for Missing Values**

In [None]:
ds.isna().sum()

Unnamed: 0,0
age,0
sex,0
total_cholesterol,0
ldl,0
hdl,0
systolic_bp,0
diastolic_bp,0
smoking,0
diabetes,0
heart_attack,0


# **Data Profiling**

In [None]:
#!pip install ydata-profiling

Collecting ydata-profiling
  Downloading ydata_profiling-4.12.2-py2.py3-none-any.whl.metadata (20 kB)
Collecting visions<0.8.0,>=0.7.5 (from visions[type_image_path]<0.8.0,>=0.7.5->ydata-profiling)
  Downloading visions-0.7.6-py3-none-any.whl.metadata (11 kB)
Collecting htmlmin==0.1.12 (from ydata-profiling)
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting phik<0.13,>=0.11.1 (from ydata-profiling)
  Downloading phik-0.12.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting multimethod<2,>=1.4 (from ydata-profiling)
  Downloading multimethod-1.12-py3-none-any.whl.metadata (9.6 kB)
Collecting imagehash==4.3.1 (from ydata-profiling)
  Downloading ImageHash-4.3.1-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting dacite>=1.8 (from ydata-profiling)
  Downloading dacite-1.9.1-py3-none-any.whl.metadata (17 kB)
Collecting PyWavelets (from imagehash==4.3.1->ydata-profiling)
  Downloading pywavelets-1.

In [None]:
#import ydata_profiling as yp

# Assuming ds DataFrame is already created from the previous code

#profile = yp.ProfileReport(ds)
#profile.to_file("heart_attack_data_profile.html") # Save the report to an HTML file
# Display the report in the notebook
#profile

# Additional quantitative analysis can be performed here. Examples below
#ds.describe() # Statistical summary
#ds.corr() # Correlations between numerical columns
#ds.groupby('target_column').mean() # Aggregation of numerical columns by target column
# ... (Other data analysis using Pandas and relevant libraries)


Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]



# **Creating Accuracy Of Data**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Assuming 'target_column' is the name of your target variable column
X = ds.drop('heart_attack', axis=1)  # Features (all columns except the target)
y = ds['heart_attack']  # Target variable

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a RandomForestClassifier (you can choose other models)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test, y_pred))


Accuracy: 0.825
              precision    recall  f1-score   support

           0       0.84      0.98      0.90       164
           1       0.56      0.14      0.22        36

    accuracy                           0.82       200
   macro avg       0.70      0.56      0.56       200
weighted avg       0.79      0.82      0.78       200



# **Hypothesis Testing**

In [None]:
import pandas as pd
from scipy import stats

# Assuming 'ds' DataFrame and 'y_test', 'y_pred' are already defined from previous code

# Example 1: One-sample t-test (comparing the mean of a column to a hypothesized value)
# Replace 'column_name' with an appropriate numerical column from your dataset
# and 'hypothesized_mean' with the value you want to compare against
t_statistic, p_value = stats.ttest_1samp(ds['age'], 50) #Example
print(f"One-sample t-test:\nT-statistic: {t_statistic:.3f}, P-value: {p_value:.3f}")


# Example 2: Two-sample t-test (comparing the means of two groups)
# Replace 'group1' and 'group2' with the names of your groups (e.g., 'male', 'female')
# Assuming your dataset has a categorical column that splits it into two groups
group1 = ds[ds['sex'] == 1]['age']  # Example: comparing age of two sex groups
group2 = ds[ds['sex'] == 0]['age']

t_statistic, p_value = stats.ttest_ind(group1, group2)
print(f"\nTwo-sample t-test:\nT-statistic: {t_statistic:.3f}, P-value: {p_value:.3f}")

# Example 3: Chi-squared test (for categorical variables)
# Replace 'categorical_column1' and 'categorical_column2' with relevant categorical column names
# Create a contingency table
contingency_table = pd.crosstab(ds['sex'], ds['heart_attack'])
chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table)
print(f"\nChi-squared test:\nChi2: {chi2:.3f}, P-value: {p_value:.3f}, Degrees of freedom: {dof}")


# Example 4: Hypothesis test on model prediction accuracy

#Null hypothesis: the accuracy of the random forest model is less than or equal to 0.7 (70%)
# Alternative hypothesis: the accuracy of the random forest model is greater than 0.7

from scipy.stats import binom

n = len(y_test)  #number of samples
k = sum(y_pred == y_test)  # number of correct predictions
p = 0.7  #Hypothesized accuracy
p_value_accuracy = binom.sf(k, n, p) #survival function, test for one sided

print(f"The p-value for the accuracy test of {accuracy} against null hypothesis {p} is: {p_value_accuracy}")

# Interpretation:
# If the p-value is less than your significance level (alpha, typically 0.05),
# you reject the null hypothesis. Otherwise, you fail to reject it.


One-sample t-test:
T-statistic: -0.254, P-value: 0.800

Two-sample t-test:
T-statistic: -2.561, P-value: 0.011

Chi-squared test:
Chi2: 11.420, P-value: 0.001, Degrees of freedom: 1
The p-value for the accuracy test of 0.825 against null hypothesis 0.7 is: 1.776389130438671e-05
