In [13]:
import pandas as pd
df = pd.read_csv('heart_attack_prediction_indonesia.csv')
y = df['heart_attack']
X = df.drop(columns=['heart_attack'])
X = pd.get_dummies(X, drop_first=True)

In [14]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression())
])


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


pipe.fit(X_train, y_train)

In [15]:
y_pred = pipe.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

Accuracy: 0.7308263079789081
F1 Score: 0.6388477017581021


In [16]:
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.75      0.83      0.79     18906
           1       0.70      0.59      0.64     12765

    accuracy                           0.73     31671
   macro avg       0.72      0.71      0.71     31671
weighted avg       0.73      0.73      0.73     31671

[[15606  3300]
 [ 5225  7540]]


In [17]:
from sklearn.model_selection import cross_validate

cv_results = cross_validate(pipe, X, y, cv=5,
                            scoring=['accuracy', 'f1', 'roc_auc'],
                            return_train_score=True)

print("Mean Accuracy:", cv_results['test_accuracy'].mean())
print("Mean F1 Score:", cv_results['test_f1'].mean())
print("Mean AUC:", cv_results['test_roc_auc'].mean())


Mean Accuracy: 0.7306179154431499
Mean F1 Score: 0.6354857577318758
Mean AUC: 0.8033184947963699


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_validate

# Define models you want to test
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC

models = {
    'LogisticRegression': LogisticRegression(max_iter=1000),
    
    'RandomForest': RandomForestClassifier(
        n_estimators=100, max_depth=10, n_jobs=-1
    ),
    
    'GradientBoosting': GradientBoostingClassifier(
        n_estimators=100, max_depth=3
    ),

    
    'LinearSVC': LinearSVC(
        max_iter=2000
    ),
    
    'KNN': KNeighborsClassifier(
        n_neighbors=5, n_jobs=-1  # n_jobs only works in some sklearn versions
    ),
    
    'NaiveBayes': GaussianNB()
}


# Loop through each model
for name, model in models.items():
    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('clf', model)
    ])
    
    cv_results = cross_validate(pipe, X, y, cv=5,
                                scoring=['accuracy', 'f1', 'roc_auc'],
                                return_train_score=False)
    
    print(f"🧠 {name}")
    print(f"  Mean Accuracy : {cv_results['test_accuracy'].mean():.3f}")
    print(f"  Mean F1 Score : {cv_results['test_f1'].mean():.3f}")
    print(f"  Mean ROC AUC  : {cv_results['test_roc_auc'].mean():.3f}")
    print("-" * 40)


🧠 LogisticRegression
  Mean Accuracy : 0.731
  Mean F1 Score : 0.635
  Mean ROC AUC  : 0.803
----------------------------------------
🧠 RandomForest
  Mean Accuracy : 0.734
  Mean F1 Score : 0.623
  Mean ROC AUC  : 0.813
----------------------------------------
🧠 GradientBoosting
  Mean Accuracy : 0.738
  Mean F1 Score : 0.651
  Mean ROC AUC  : 0.817
----------------------------------------
🧠 LinearSVC
  Mean Accuracy : 0.731
  Mean F1 Score : 0.634
  Mean ROC AUC  : 0.803
----------------------------------------
🧠 KNN
  Mean Accuracy : 0.678
  Mean F1 Score : 0.533
  Mean ROC AUC  : 0.713
----------------------------------------
🧠 NaiveBayes
  Mean Accuracy : 0.717
  Mean F1 Score : 0.632
  Mean ROC AUC  : 0.793
----------------------------------------
