In [5]:
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

heart_data = pd.read_csv('datasets/heart.csv')

X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=42)

models = {
    "Logistic Regression": LogisticRegression(max_iter=120000),
    "Random Forest": RandomForestClassifier(random_state=42),
    "Support Vector Machine": SVC(probability=True, random_state=42)
}

best_model = None
best_accuracy = 0
best_model_name = ""

for model_name, model in models.items():
    model.fit(X_train, Y_train)
    
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)
    
    train_accuracy = accuracy_score(Y_train, train_predictions)
    test_accuracy = accuracy_score(Y_test, test_predictions)
    
    print(f"{model_name} Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}")
    
    if test_accuracy > best_accuracy:
        best_model = model
        best_accuracy = test_accuracy
        best_model_name = model_name

print(f"\nBest Model: {best_model_name} with Test Accuracy: {best_accuracy:.4f}")

model_filename = f"best_model_{best_model_name.replace(' ', '_').lower()}.pkl"
with open(model_filename, 'wb') as file:
    pickle.dump(best_model, file)
print(f"Best model saved as {model_filename}")

input_data_str = ['62', '0', '0', '140', '268', '0', '0', '160', '0', '3.6', '0', '2', '2']
input_data = np.array(input_data_str, dtype=float).reshape(1, -1)

prediction = best_model.predict(input_data)

if prediction[0] == 0:
    print("The person does not have heart disease.")
else:
    print("The person has heart disease.")


Logistic Regression Train Accuracy: 0.8512, Test Accuracy: 0.8033
Random Forest Train Accuracy: 1.0000, Test Accuracy: 0.8361
Support Vector Machine Train Accuracy: 0.6529, Test Accuracy: 0.6557

Best Model: Random Forest with Test Accuracy: 0.8361
Best model saved as best_model_random_forest.pkl
The person does not have heart disease.


