In [1]:
import pandas as pd
from warnings import simplefilter
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC


# ignore all warnings
simplefilter(action='ignore', category=UserWarning)

In [2]:
from google.colab import files
  
  
uploaded = files.upload()

Saving heart.csv to heart.csv


In [3]:
import io
data = pd.read_csv(io.BytesIO(uploaded['heart.csv']))

In [4]:
# define the features columns
x_columns = data.columns.drop("target")

In [5]:
# create training and testing data
X = data[x_columns]
y = data["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print(y_test)

271    0
6      1
159    1
259    0
197    0
      ..
148    1
141    1
118    1
294    0
55     1
Name: target, Length: 61, dtype: int64


In [6]:
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)

nb = GaussianNB()
nb.fit(X_train, y_train)

models = {"DTC": dtc, "NB": nb}


In [7]:
def test_models(X_test, y_test):
    
    for name, model in models.items():
        # predict y vals
        y_pred = model.predict(X_test)
        # evaluate the model
        accuracy = accuracy_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        print("{} results".format(name))
        print("Accuracy: {}".format(round(accuracy, 2)))
        print("Recall: {}".format(round(recall, 2)))
        print("Precision: {}".format(round(precision, 2)))
        print("F1: {}".format(round(f1, 2)))
        print()

test_models(X_test, y_test)


DTC results
Accuracy: 0.75
Recall: 0.74
Precision: 0.85
F1: 0.79

NB results
Accuracy: 0.82
Recall: 0.84
Precision: 0.86
F1: 0.85



In [8]:
log = LogisticRegression()
log.fit(X_train, y_train)

log_y_pred = log.predict(X_test)

print("Logistic Regression results")
print("Accuracy: {}".format(round(accuracy_score(y_test, log_y_pred), 2)))
print("Recall: {}".format(round(recall_score(y_test, log_y_pred), 2)))
print("Precision: {}".format(round(precision_score(y_test, log_y_pred), 2)))
print("F1: {}".format(round(f1_score(y_test, log_y_pred), 2)))
print()

Logistic Regression results
Accuracy: 0.8
Recall: 0.84
Precision: 0.84
F1: 0.84



In [9]:
svc = SVC()
svc.fit(X_train, y_train)

svc_y_pred = svc.predict(X_test)

print("SVM results")
print("Accuracy: {}".format(round(accuracy_score(y_test, svc_y_pred), 2)))
print("Recall: {}".format(round(recall_score(y_test, svc_y_pred), 2)))
print("Precision: {}".format(round(precision_score(y_test, svc_y_pred), 2)))
print("F1: {}".format(round(f1_score(y_test, svc_y_pred), 2)))
print()

SVM results
Accuracy: 0.52
Recall: 0.66
Precision: 0.61
F1: 0.63

