In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import timeit

#Importing Dataset
dataset=pd.read_csv('D:\Programming\Datasets\Classification\Heart_Failure\heart.csv')

#relabel values in columns to be numeric
label_encoder = LabelEncoder()
dataset['Sex'] = label_encoder.fit_transform(dataset['Sex'])
dataset['ChestPainType'] = label_encoder.fit_transform(dataset['ChestPainType'])
dataset['RestingECG'] = label_encoder.fit_transform(dataset['RestingECG'])
dataset['ExerciseAngina'] = label_encoder.fit_transform(dataset['ExerciseAngina'])
dataset['ST_Slope'] = label_encoder.fit_transform(dataset['ST_Slope'])

X=dataset[['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 
           'FastingBS', 'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope']].values
y=dataset[['HeartDisease']].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_train = y_train.ravel()
y_test = y_test.ravel()

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

acc_all = []
train_all = []
test_all = []

In [13]:
# Logistic Regression

logistic_regression = LogisticRegression()
training_time_lr = timeit.timeit(lambda: logistic_regression.fit(X_train, y_train), number=1)
testing_time_lr = timeit.timeit(lambda: logistic_regression.predict(X_test), number=1)

y_pred_lr = logistic_regression.predict(X_test)
accuracy_lr = accuracy_score(y_test, y_pred_lr)

print(f"Logistic Regression Training Time: {training_time_lr:.4f} seconds")
print(f"Logistic Regression Testing Time: {testing_time_lr:.4f} seconds")
print(f"Logistic Regression Accuracy: {accuracy_lr:.2f}")

acc_all.append(accuracy_lr)
train_all.append(training_time_lr)
test_all.append(testing_time_lr)

Logistic Regression Training Time: 0.0033 seconds
Logistic Regression Testing Time: 0.0002 seconds
Logistic Regression Accuracy: 0.85


In [14]:
# K-Nearest Neighbors (KNN)

knn_classifier = KNeighborsClassifier(n_neighbors=3)
training_time_knn = timeit.timeit(lambda: knn_classifier.fit(X_train, y_train), number=1)
testing_time_knn = timeit.timeit(lambda: knn_classifier.predict(X_test), number=1)

y_pred_knn = knn_classifier.predict(X_test)
accuracy_knn = accuracy_score(y_test, y_pred_knn)

print(f"KNN Training Time: {training_time_knn:.4f} seconds")
print(f"KNN Testing Time: {testing_time_knn:.4f} seconds")
print(f"KNN Accuracy: {accuracy_knn:.2f}")

acc_all.append(accuracy_knn)
train_all.append(training_time_knn)
test_all.append(testing_time_knn)

KNN Training Time: 0.0026 seconds
KNN Testing Time: 0.0126 seconds
KNN Accuracy: 0.83


In [15]:
# Decision Trees

decision_tree = DecisionTreeClassifier()
training_time_dt = timeit.timeit(lambda: decision_tree.fit(X_train, y_train), number=1)
testing_time_dt = timeit.timeit(lambda: decision_tree.predict(X_test), number=1)

y_pred_dt = decision_tree.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_pred_dt)

print(f"Decision Tree Training Time: {training_time_dt:.4f} seconds")
print(f"Decision Tree Testing Time: {testing_time_dt:.4f} seconds")
print(f"Decision Tree Accuracy: {accuracy_dt:.2f}")

acc_all.append(accuracy_dt)
train_all.append(training_time_dt)
test_all.append(testing_time_dt)

Decision Tree Training Time: 0.0041 seconds
Decision Tree Testing Time: 0.0002 seconds
Decision Tree Accuracy: 0.80


In [16]:
# Random Forest

random_forest = RandomForestClassifier(n_estimators=100)
training_time_rf = timeit.timeit(lambda: random_forest.fit(X_train, y_train), number=1)
testing_time_rf = timeit.timeit(lambda: random_forest.predict(X_test), number=1)

y_pred_rf = random_forest.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)

print(f"Random Forest Training Time: {training_time_rf:.4f} seconds")
print(f"Random Forest Testing Time: {testing_time_rf:.4f} seconds")
print(f"Random Forest Accuracy: {accuracy_rf:.2f}")

acc_all.append(accuracy_rf)
train_all.append(training_time_rf)
test_all.append(testing_time_rf)

Random Forest Training Time: 0.1760 seconds
Random Forest Testing Time: 0.0095 seconds
Random Forest Accuracy: 0.89


In [17]:
# Naive Bayes

naive_bayes = GaussianNB()
training_time_nb = timeit.timeit(lambda: naive_bayes.fit(X_train, y_train), number=1)
testing_time_nb = timeit.timeit(lambda: naive_bayes.predict(X_test), number=1)

y_pred_nb = naive_bayes.predict(X_test)
accuracy_nb = accuracy_score(y_test, y_pred_nb)

print(f"Naive Bayes Training Time: {training_time_nb:.4f} seconds")
print(f"Naive Bayes Testing Time: {testing_time_nb:.4f} seconds")
print(f"Naive Bayes Accuracy: {accuracy_nb:.2f}")

acc_all.append(accuracy_nb)
train_all.append(training_time_nb)
test_all.append(testing_time_nb)

Naive Bayes Training Time: 0.0013 seconds
Naive Bayes Testing Time: 0.0002 seconds
Naive Bayes Accuracy: 0.84


In [18]:
# Support Vector Machine (SVM)

svm_classifier = SVC()
training_time_svm = timeit.timeit(lambda: svm_classifier.fit(X_train, y_train), number=1)
testing_time_svm = timeit.timeit(lambda: svm_classifier.predict(X_test), number=1)

y_pred_svm = svm_classifier.predict(X_test)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

print(f"SVM Training Time: {training_time_svm:.4f} seconds")
print(f"SVM Testing Time: {testing_time_svm:.4f} seconds")
print(f"SVM Accuracy: {accuracy_svm:.2f}")

acc_all.append(accuracy_svm)
train_all.append(training_time_svm)
test_all.append(testing_time_svm)

SVM Training Time: 0.0130 seconds
SVM Testing Time: 0.0059 seconds
SVM Accuracy: 0.86


In [19]:
# Multi-Layer Perceptron Classifier (MLPC)

mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1400)
training_time_mlp = timeit.timeit(lambda: mlp_classifier.fit(X_train, y_train), number=1)
testing_time_mlp = timeit.timeit(lambda: mlp_classifier.predict(X_test), number=1)

y_pred_mlp = mlp_classifier.predict(X_test)
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)

print(f"MLP Classifier Training Time: {training_time_mlp:.4f} seconds")
print(f"MLP Classifier Testing Time: {testing_time_mlp:.4f} seconds")
print(f"MLP Classifier Accuracy: {accuracy_mlp:.2f}")

acc_all.append(accuracy_svm)
train_all.append(training_time_mlp)
test_all.append(testing_time_mlp)

MLP Classifier Training Time: 2.9413 seconds
MLP Classifier Testing Time: 0.0002 seconds
MLP Classifier Accuracy: 0.87


In [20]:
weight_accuracy = 0.5
weight_time = 0.5

lr_bati = weight_accuracy * ((accuracy_lr - min(acc_all)) / (max(acc_all) - min(acc_all))) \
        + weight_time * (1 - (training_time_lr - min(train_all)) / (max(train_all) - min(train_all)))
print(f"Logistic Regression BATI = {lr_bati:.2f}")

knn_bati = weight_accuracy * ((accuracy_knn - min(acc_all)) / (max(acc_all) - min(acc_all))) \
        + weight_time * (1 - (training_time_knn - min(train_all)) / (max(train_all) - min(train_all)))
print(f"K-Nearest Neighbors BATI = {knn_bati:.2f}")

dt_bati = weight_accuracy * ((accuracy_dt - min(acc_all)) / (max(acc_all) - min(acc_all))) \
        + weight_time * (1 - (training_time_dt - min(train_all)) / (max(train_all) - min(train_all)))
print(f"Decision Trees BATI = {dt_bati:.2f}")

rf_bati = weight_accuracy * ((accuracy_rf - min(acc_all)) / (max(acc_all) - min(acc_all))) \
        + weight_time * (1 - (training_time_rf - min(train_all)) / (max(train_all) - min(train_all)))
print(f"Random Forest BATI = {rf_bati:.2f}")

nb_bati = weight_accuracy * ((accuracy_nb - min(acc_all)) / (max(acc_all) - min(acc_all))) \
        + weight_time * (1 - (training_time_nb - min(train_all)) / (max(train_all) - min(train_all)))
print(f"Naive Bayes BATI = {nb_bati:.2f}")

svm_bati = weight_accuracy * ((accuracy_svm - min(acc_all)) / (max(acc_all) - min(acc_all))) \
        + weight_time * (1 - (training_time_svm - min(train_all)) / (max(train_all) - min(train_all)))
print(f"Support Vector Machine BATI = {svm_bati:.2f}")
                                                                                                         
mlp_bati = (weight_accuracy * ((accuracy_mlp - min(acc_all)) / (max(acc_all) - min(acc_all))) \
        + weight_time * (1 - (training_time_mlp - min(train_all)) / (max(train_all) - min(train_all))))
print(f"Multi-Layer Perceptron BATI = {mlp_bati:.2f}")

Logistic Regression BATI = 0.76
K-Nearest Neighbors BATI = 0.68
Decision Trees BATI = 0.50
Random Forest BATI = 0.97
Naive Bayes BATI = 0.74
Support Vector Machine BATI = 0.85
Multi-Layer Perceptron BATI = 0.38
