In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Read cleaned dataset
df = pd.read_excel("Cleaned_Dataset_v2.xlsx")

# Remove non-numeric columns
df = df.select_dtypes(include=[np.number])

# Define target and features
X = df.drop("HadHeartAttack", axis=1)
y = df["HadHeartAttack"]

# Split data into training and testing set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Train the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict
y_pred_rf_model = rf_model.predict(X_test)

# Calculate Performance
acc = accuracy_score(y_test, y_pred_rf_model)
cm = confusion_matrix(y_test, y_pred_rf_model)
report = classification_report(y_test, y_pred_rf_model, output_dict=False)

print(f"Accuracy: {acc}")
print(f"Confusion Matrix:\n{cm}")
print(f"Classification Report:\n{report}")

# Optional: Feature importance
feature_importance = rf_model.feature_importances_
feature_names = X.columns
importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': feature_importance
}).sort_values('Importance', ascending=False)

print(f"\nTop 10 Most Important Features:")
print(importance_df.head(10))

Accuracy: 0.9459427160100387
Confusion Matrix:
[[70553   633]
 [ 3438   685]]
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.99      0.97     71186
           1       0.52      0.17      0.25      4123

    accuracy                           0.95     75309
   macro avg       0.74      0.58      0.61     75309
weighted avg       0.93      0.95      0.93     75309


Top 10 Most Important Features:
               Feature  Importance
4            HadAngina    0.163353
22                 BMI    0.133337
21              Weight    0.115247
20              Height    0.088344
3           SleepHours    0.068705
0   PhysicalHealthDays    0.054098
1     MentalHealthDays    0.041000
27   TetanusLast10Tdap    0.021707
25        FluVaxLast12    0.020127
29            CovidPos    0.019525
