In [24]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
data = pd.read_csv('./heart.csv')
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
data.shape

(303, 14)

In [4]:
X = data.drop(columns='target', axis=1)
y = data['target']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=3)

In [25]:
models = [LogisticRegression(max_iter=1000), DecisionTreeClassifier(), KNeighborsClassifier(n_neighbors=5), RandomForestClassifier()]

In [13]:
def compareModel():
    for model in models:
        model.fit(X_train, y_train)
        pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, pred)
        print(f'accuray score of model {model}: {accuracy}')

In [16]:
compareModel()

accuray score of model LogisticRegression(max_iter=1000): 0.7868852459016393
accuray score of model SVC(kernel='linear', max_iter=1000): 0.639344262295082
accuray score of model KNeighborsClassifier(): 0.6557377049180327
accuray score of model RandomForestClassifier(): 0.7704918032786885




## Cross-validation

In [18]:
cv_score_lr = cross_val_score(LogisticRegression(max_iter=1000), X, y, cv=5)
print(cv_score_lr.mean())

0.8282513661202187


In [27]:
def crossValidation():
    for model in models:
        cv_score = cross_val_score(model, X, y, cv=5)
        accuracy = cv_score.mean()*100
        print(f"Mean Accuracy of {model} : {accuracy}%.2f")

In [28]:
crossValidation()

Mean Accuracy of LogisticRegression(max_iter=1000) : 82.82513661202186%.2f
Mean Accuracy of DecisionTreeClassifier() : 74.23497267759564%.2f
Mean Accuracy of KNeighborsClassifier() : 64.38797814207649%.2f
Mean Accuracy of RandomForestClassifier() : 82.48633879781421%.2f
