### Load Data From CSV File

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import category_encoders as ce
from sklearn import preprocessing
from matplotlib.ticker import NullFormatter
from fast_ml.model_development import train_valid_test_split
%matplotlib inline

In [6]:
df = pd.read_csv('Dataset.csv')
print("Veri seti örneği:")
df.head()

Unnamed: 0,age,sex,smoke,years,ldl,chp,height,weight,fh,active,...,ihd,hr,dm,bpsys,bpdias,htn,ivsd,ecgpatt,qwave,target
0,65.0,0,0,0,69.0,4,168,111.0,1,0,...,1,98,1,120,80,1,0,4,0,0
1,54.0,1,0,0,117.0,2,145,81.0,0,0,...,0,85,0,130,80,0,0,4,0,0
2,61.0,0,1,45,86.2,2,160,72.0,0,0,...,0,63,1,150,70,1,0,2,0,1
3,57.0,0,0,0,76.0,2,176,78.0,1,0,...,1,74,1,120,70,0,0,3,0,1
4,62.0,1,0,0,160.0,3,154,61.0,0,0,...,0,89,1,110,70,0,0,1,0,1


### Check for missing data

In [3]:
df.isnull().values.any()

True

In [5]:
df.dropna()

Unnamed: 0,age,sex,smoke,years,ldl,chp,height,weight,fh,active,...,ihd,hr,dm,bpsys,bpdias,htn,ivsd,ecgpatt,qwave,target
0,65.0,0,0,0,69.0,4,168,111.0,1,0,...,1,98,1,120,80,1,0,4,0,0
1,54.0,1,0,0,117.0,2,145,81.0,0,0,...,0,85,0,130,80,0,0,4,0,0
2,61.0,0,1,45,86.2,2,160,72.0,0,0,...,0,63,1,150,70,1,0,2,0,1
3,57.0,0,0,0,76.0,2,176,78.0,1,0,...,1,74,1,120,70,0,0,3,0,1
4,62.0,1,0,0,160.0,3,154,61.0,0,0,...,0,89,1,110,70,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
346,46.0,1,0,0,111.0,4,143,66.5,0,0,...,0,107,0,150,90,1,1,1,0,0
347,42.0,0,1,17,164.0,2,164,79.0,1,1,...,0,112,0,110,70,0,0,1,0,0
348,51.0,0,0,0,116.0,4,180,92.0,0,1,...,0,81,0,105,80,1,0,4,0,0
349,45.0,1,0,0,102.0,3,166,79.0,0,0,...,0,96,0,110,70,0,0,4,0,0


### Division of samples

In [7]:
X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(df , target = 'target', train_size=0.8, valid_size=0.1, test_size=0.1)

In [13]:
print(X_train.shape) 
print(y_train.shape)
print(X_valid.shape)
print(y_valid.shape)
print(X_test.shape)
print(y_test.shape)

(280, 20)
(280,)
(35, 20)
(35,)
(36, 20)
(36,)


### K-Nearest Neighbors (KNN) with validation data

In [None]:
from sklearn.datasets import make_blobs
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

knn3 = KNeighborsClassifier(n_neighbors = 3)
knn5 = KNeighborsClassifier(n_neighbors = 5)

In [None]:
knn3.fit(X_train, y_train)
knn5.fit(X_train, y_train)

y_pred_3 = knn3.predict(X_valid)
y_pred_5 = knn5.predict(X_valid)

In [None]:
from sklearn.metrics import accuracy_score
print("Accuracy with k=3", accuracy_score(y_valid, y_pred_3)*100)
print("Accuracy with k=5", accuracy_score(y_valid, y_pred_5)*100)

### K-Nearest Neighbors (KNN) with test data

In [None]:
y_pred_3 = knn3.predict(X_test)
y_pred_5 = knn5.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
print("Accuracy with k=3 :", accuracy_score(y_test, y_pred_3)*100)
print("Accuracy with k=5 :", accuracy_score(y_test, y_pred_5)*100)

### Naive Bayes with validation data

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
y_pred = gnb.fit(X_train, y_train).predict(X_valid)
print("Number of mislabeled points out of a total %d points : %d" % (X_test.shape[0], (y_valid != y_pred).sum()))
print("Accuracy : ", (X_test.shape[0] - (y_valid != y_pred).sum())/(X_test.shape[0])*100)

### Naive Bayes with test data

In [None]:
y_pred = gnb.fit(X_train, y_train).predict(X_test)
print("Number of mislabeled points out of a total %d points : %d" % (X_test.shape[0], (y_test != y_pred).sum()))
print("Accuracy : ", (X_test.shape[0] - (y_test != y_pred).sum())/(X_test.shape[0])*100)

### Decision trees with validation data

In [None]:
from sklearn.tree import DecisionTreeClassifier 
from sklearn.model_selection import train_test_split 
from sklearn import metrics

clf = DecisionTreeClassifier()
clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_valid)

print("Accuracy: ",metrics.accuracy_score(y_valid, y_pred)*100)

### decision trees with test data

In [None]:
y_pred = clf.predict(X_test)

print("Accuracy: ",metrics.accuracy_score(y_test, y_pred)*100)

### Backpropagation with validation data