In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
import pickle

# Load the dataset
url = 'https://raw.githubusercontent.com/MicoT/heart-disease-dataset/main/data.csv'
df = pd.read_csv(url)

# Split the data into features and target
target = df['target']
features = df.drop(['target'], axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)

# Logistic Regression model
lr_model = LogisticRegression(max_iter=2000, random_state=42)
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)

# Neural Network model
nn_model = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=1000, random_state=42)
nn_model.fit(X_train, y_train)
nn_predictions = nn_model.predict(X_test)

# SVC model
svc_model = SVC(random_state=42)
svc_model.fit(X_train, y_train)
svc_predictions = svc_model.predict(X_test)

# Define the models
models = {
    'Random Forest': rf_model,
    'Logistic Regression': lr_model,
    'Neural Network': nn_model,
    'SVC': svc_model
}

for model_name, model in models.items():
    print(f"Model: {model_name}")
    print("Cross-Validation Performance:")
    cv_scores = cross_val_score(model, features, target, cv=5)
    print("Mean Accuracy:", np.mean(cv_scores))
    print("Standard Deviation:", np.std(cv_scores))
    print("")

    print("Classification Report:")
    if model_name == 'Random Forest':
        predictions = rf_predictions
    elif model_name == 'Logistic Regression':
        predictions = lr_predictions
    elif model_name == 'Neural Network':
        predictions = nn_predictions
    elif model_name == 'SVC':
        predictions = svc_predictions
    
    print(classification_report(y_test, predictions))
    print()


Model: Random Forest
Cross-Validation Performance:
Mean Accuracy: 0.8185185185185183
Standard Deviation: 0.055431961285539864

Classification Report:
              precision    recall  f1-score   support

           1       0.81      0.88      0.84        33
           2       0.78      0.67      0.72        21

    accuracy                           0.80        54
   macro avg       0.79      0.77      0.78        54
weighted avg       0.79      0.80      0.79        54


Model: Logistic Regression
Cross-Validation Performance:
Mean Accuracy: 0.8444444444444444
Standard Deviation: 0.032288140322523494

Classification Report:
              precision    recall  f1-score   support

           1       0.91      0.97      0.94        33
           2       0.95      0.86      0.90        21

    accuracy                           0.93        54
   macro avg       0.93      0.91      0.92        54
weighted avg       0.93      0.93      0.93        54


Model: Neural Network
Cross-Validation

In [None]:
# Save the models using pickle
for model_name, model in models.items():
    filename = model_name.lower().replace(' ', '_') + '_model.pkl'
    with open(filename, 'wb') as file:
        pickle.dump(model, file)
    print(f"Saved {model_name} model as {filename}")