In [86]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB

In [87]:
df=pd.read_csv('dataset_heart.csv')

In [88]:
df.head()

Unnamed: 0,age,sex,chest pain type,resting blood pressure,serum cholestoral,fasting blood sugar,resting electrocardiographic results,max heart rate,exercise induced angina,oldpeak,ST segment,major vessels,thal,heart disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,2
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,1
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,2
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,1
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,1


In [89]:
x=df.drop(columns='heart disease')
y=df['heart disease']

In [90]:
y.head()

0    2
1    1
2    2
3    1
4    1
Name: heart disease, dtype: int64

# splitting data

In [91]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42)

# logistic regression

In [92]:
model=LogisticRegression(max_iter=1000)
model.fit(x,y)

In [93]:
probabilities=model.predict_proba(x)

In [94]:
probabilities[:5]

array([[0.0131376 , 0.9868624 ],
       [0.35841278, 0.64158722],
       [0.80177806, 0.19822194],
       [0.06119412, 0.93880588],
       [0.83613062, 0.16386938]])

In [95]:
predicted_classes = model.predict(x)
print(predicted_classes[:5])

[2 2 1 2 1]


# k-Nearest Neighbors (k-NN)

In [96]:
k=5
model=KNeighborsClassifier(n_neighbors=7)
model.fit(x_train,y_train)

In [97]:
print(model.predict(x_test)) 

[1 2 1 1 2 2 1 1 1 2 2 2 2 2 2 1 2 1 2 2 2 1 2 1 1 1 1 1 2 1 1 1 1 1 1 2 1
 2 2 2 2 1 2 1 1 1 1 2 2 1 2 1 1 2 1 2 1 1 1 2 2 1 1 1 1 1 2 2 1 2 1 2 1 1
 1 2 1 2 2 1 1]


# Support Vector Machines (SVM)

In [98]:
model=SVC(kernel='linear')

In [99]:
model.fit(x_train,y_train)

In [100]:
model.predict(x_test)

array([2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1,
       2, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 1,
       1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1], dtype=int64)

In [101]:
model.predict(x_test[:5])

array([2, 1, 1, 1, 1], dtype=int64)

# Decision Trees

In [102]:
tree=DecisionTreeClassifier(criterion='gini',)


In [103]:
tree.fit(x_train,y_train)

In [104]:
tree.predict(x_test)

array([1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2,
       1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2,
       1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1,
       2, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1], dtype=int64)

# Random Forests

In [105]:
rf_random=RandomForestClassifier(n_estimators=100,random_state=42)

In [106]:
rf_random.fit(x_train,y_train)

In [107]:
y_pred=rf_random.predict(x_test)

In [108]:
y_pred

array([1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1,
       1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 2,
       1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1], dtype=int64)

# Gradient Boosting

In [109]:
boost_model=GradientBoostingClassifier(n_estimators=100,random_state=42)

In [110]:
boost_model.fit(x_train,y_train)

In [111]:
y_predi=boost_model.predict(x_test)

In [112]:
y_predi

array([1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1,
       1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 1,
       1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1], dtype=int64)

# Naive Bayes

In [113]:
n_model=GaussianNB()
n_model.fit(x_train,y_train)

In [114]:
y_predict=n_model.predict(x_test)

In [115]:
y_predict

array([2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1,
       2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 1,
       1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1,
       1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)