In [2]:
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split

In [6]:
df = pd.read_csv("../dataset/jantung.csv")
df = df.sample(frac = 1)
df.head()

Unnamed: 0,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,Sex,AgeCategory,Diabetic,PhysicalActivity,GenHealth,SleepTime,KidneyDisease,HeartDisease
8368,No,No,0,1,Male,18-24,No,Yes,Very good,7,No,No
6590,No,No,0,2,Female,50-54,No,Yes,Very good,6,No,No
4991,No,No,0,1,Female,60-64,Yes (during pregnancy),Yes,Good,6,No,No
492,No,No,0,1,Female,75-79,No,No,Good,10,No,No
729,No,No,0,0,Male,70-74,No,Yes,Good,7,No,No


In [7]:
df["GenHealth"].unique()

array(['Very good', 'Good', 'Fair', 'Excellent', 'Poor'], dtype=object)

In [18]:
df

Unnamed: 0,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,Sex,AgeCategory,Diabetic,PhysicalActivity,GenHealth,SleepTime,KidneyDisease,HeartDisease
8368,0,0,0,1,1,0,0,1,3,7,0,0
6590,0,0,0,2,0,6,0,1,3,6,0,0
4991,0,0,0,1,0,8,0,1,2,6,0,0
492,0,0,0,1,0,11,0,0,2,10,0,0
729,0,0,0,0,1,10,0,1,2,7,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
6386,0,0,0,0,0,8,0,1,2,7,0,0
5599,0,0,0,0,0,10,0,0,2,6,0,0
5109,1,0,0,0,1,12,1,1,3,8,0,0
354,0,0,0,0,0,7,0,0,3,6,0,0


In [8]:
df["AlcoholDrinking"] = df['AlcoholDrinking'].replace(['Yes', 'No'], ["1", "0"])
df["Stroke"] = df['Stroke'].replace(['Yes', 'No'], ["1", "0"])
df["Sex"] = df['Sex'].replace(['Male', 'Female'], ["1", "0"])
arrAgeCategory = list(map(str, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]))
df["AgeCategory"] = df['AgeCategory'].replace(['18-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64','65-69','70-74', '75-79', '80 or older'], arrAgeCategory)
df["Diabetic"] = df['AlcoholDrinking'].replace(['Yes', 'No'], ["1", "0"])
df["PhysicalActivity"] = df['PhysicalActivity'].replace(['Yes', 'No'], ["1", "0"])
df["GenHealth"] = df['GenHealth'].replace(['Very good', 'Fair', 'Good', 'Poor', 'Excellent'], list(map(str, [3, 1, 2, 0, 4])))
df["KidneyDisease"] = df['KidneyDisease'].replace(['Yes', 'No'], ["1", "0"])
df["HeartDisease"] = df['HeartDisease'].replace(['Yes', 'No'], ["1", "0"])
arrAgeCategory

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']

In [9]:
train, test = train_test_split(df, train_size=0.8, random_state=42, shuffle=False)
train

Unnamed: 0,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,Sex,AgeCategory,Diabetic,PhysicalActivity,GenHealth,SleepTime,KidneyDisease,HeartDisease
8368,0,0,0,1,1,0,0,1,3,7,0,0
6590,0,0,0,2,0,6,0,1,3,6,0,0
4991,0,0,0,1,0,8,0,1,2,6,0,0
492,0,0,0,1,0,11,0,0,2,10,0,0
729,0,0,0,0,1,10,0,1,2,7,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
6145,0,0,0,0,1,2,0,1,4,7,0,0
2085,1,0,0,0,1,9,1,0,2,6,0,0
3349,0,0,3,3,0,7,0,0,0,6,0,0
6777,0,0,0,0,0,8,0,1,1,7,0,0


In [10]:
train_feature = train.loc[:, train.columns != "HeartDisease"]
train_target = train["HeartDisease"]

test_feature = test.loc[:, test.columns != "HeartDisease"]
test_target = test["HeartDisease"]

In [14]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

model = DecisionTreeClassifier(criterion='entropy')
history = model.fit(train_feature,train_target)
y_pred = model.predict(test_feature)

print("classification_report :\n" + str(classification_report(test_target,y_pred)))
print("confusion_matrix :\n" + str(confusion_matrix(test_target,y_pred)))
print("accuracy_score :\n" + str(accuracy_score(test_target,y_pred)))

classification_report :
              precision    recall  f1-score   support

           0       0.91      0.94      0.93      1883
           1       0.23      0.16      0.19       198

    accuracy                           0.87      2081
   macro avg       0.57      0.55      0.56      2081
weighted avg       0.85      0.87      0.86      2081

confusion_matrix :
[[1776  107]
 [ 166   32]]
accuracy_score :
0.8688130706391158


In [15]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=7)
model.fit(train_feature,train_target)
y_pred = model.predict(test_feature)

print("classification_report :\n" + str(classification_report(test_target,y_pred)))
print("confusion_matrix :\n" + str(confusion_matrix(test_target,y_pred)))
print("accuracy_score :\n" + str(accuracy_score(test_target,y_pred)))

classification_report :
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      1883
           1       0.33      0.05      0.08       198

    accuracy                           0.90      2081
   macro avg       0.62      0.52      0.51      2081
weighted avg       0.85      0.90      0.86      2081

confusion_matrix :
[[1865   18]
 [ 189    9]]
accuracy_score :
0.9005285920230658


In [16]:
from sklearn.svm import SVC

model = SVC()
model.fit(train_feature,train_target)
y_pred = model.predict(test_feature)

print("classification_report :\n" + str(classification_report(test_target,y_pred)))
print("confusion_matrix :\n" + str(confusion_matrix(test_target,y_pred)))
print("accuracy_score :\n" + str(accuracy_score(test_target,y_pred)))

classification_report :
              precision    recall  f1-score   support

           0       0.90      1.00      0.95      1883
           1       0.00      0.00      0.00       198

    accuracy                           0.90      2081
   macro avg       0.45      0.50      0.48      2081
weighted avg       0.82      0.90      0.86      2081

confusion_matrix :
[[1883    0]
 [ 198    0]]
accuracy_score :
0.9048534358481499


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [17]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(train_feature,train_target)
y_pred = model.predict(test_feature)

print("classification_report :\n" + str(classification_report(test_target,y_pred)))
print("confusion_matrix :\n" + str(confusion_matrix(test_target,y_pred)))
print("accuracy_score :\n" + str(accuracy_score(test_target,y_pred)))

classification_report :
              precision    recall  f1-score   support

           0       0.92      0.99      0.95      1883
           1       0.56      0.13      0.21       198

    accuracy                           0.91      2081
   macro avg       0.74      0.56      0.58      2081
weighted avg       0.88      0.91      0.88      2081

confusion_matrix :
[[1863   20]
 [ 173   25]]
accuracy_score :
0.9072561268620856
