In [18]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv('heart_attack_prediction_dataset.csv')

# Display the first few rows to check data
print("Initial Data Preview:")
print(df.head())

# Initialize LabelEncoder for categorical data
encoder = LabelEncoder()

# Encode 'Sex' column (Male/Female -> 0/1)
df['Sex'] = encoder.fit_transform(df['Sex'])

# List of categorical columns to encode
categorical_columns = ['Country', 'Continent', 'Hemisphere', 'Diet', 'Medication Use']

# Apply Label Encoding to multiple categorical columns
for col in categorical_columns:
    df[col] = encoder.fit_transform(df[col])

# Split "Blood Pressure" column into separate "Systolic" and "Diastolic" columns
df[['Systolic', 'Diastolic']] = df['Blood Pressure'].str.split('/', expand=True)

# Convert new columns to numeric values
df['Systolic'] = pd.to_numeric(df['Systolic'])
df['Diastolic'] = pd.to_numeric(df['Diastolic'])

# Drop the original "Blood Pressure" column
df.drop(columns=['Blood Pressure'], inplace=True)

# Define X (features) and Y (target)
X = df.drop(columns=['Heart Attack Risk', 'Patient ID'])  # Drop the target column and non-relevant ID
Y = df['Heart Attack Risk']  # Target variable

# Split the dataset into 80% training and 20% testing
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Initialize the Gaussian Naïve Bayes model
model = GaussianNB()

# Train the model using the training data
model.fit(X_train, Y_train)

# Make predictions using the test data
Y_pred = model.predict(X_test)

# Calculate and display the accuracy of the model
accuracy = accuracy_score(Y_test, Y_pred)
print(f'Model Accuracy: {accuracy * 100:.2f}%')



Initial Data Preview:
  Patient ID  Age     Sex  Cholesterol Blood Pressure  Heart Rate  Diabetes  \
0    BMW7812   67    Male          208         158/88          72         0   
1    CZE1114   21    Male          389         165/93          98         1   
2    BNI9906   21  Female          324         174/99          72         1   
3    JLN3497   84    Male          383        163/100          73         1   
4    GFO8847   66    Male          318          91/88          93         1   

   Family History  Smoking  Obesity  ...  Sedentary Hours Per Day  Income  \
0               0        1        0  ...                 6.615001  261404   
1               1        1        1  ...                 4.963459  285768   
2               0        0        0  ...                 9.463426  235282   
3               1        1        0  ...                 7.648981  125640   
4               1        1        1  ...                 1.514821  160555   

         BMI  Triglycerides  Physical Ac