In [1]:
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:
df = pd.read_csv("../dataset/diabetes.csv")
df.head()

Unnamed: 0,age,gender,polyuria,polydipsia,sudden_weight_loss,polyphagia,delayed_healing,obesity,class
0,40,Male,0,1,0,0,1,1,1
1,58,Male,0,0,0,0,0,0,1
2,41,Male,1,0,0,1,1,0,1
3,45,Male,0,0,1,1,1,0,1
4,60,Male,1,1,1,1,1,1,1


In [3]:
df["gender"].unique()

array(['Male', 'Female'], dtype=object)

In [4]:
df["gender"] = df['gender'].replace(['Male', 'Female'], ["1", "0"])
df

Unnamed: 0,age,gender,polyuria,polydipsia,sudden_weight_loss,polyphagia,delayed_healing,obesity,class
0,40,1,0,1,0,0,1,1,1
1,58,1,0,0,0,0,0,0,1
2,41,1,1,0,0,1,1,0,1
3,45,1,0,0,1,1,1,0,1
4,60,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...
515,39,0,1,1,1,1,1,0,1
516,48,0,1,1,1,1,1,0,1
517,58,0,1,1,1,1,0,1,1
518,32,0,0,0,0,0,1,0,0


In [5]:
train, test = train_test_split(df, train_size=0.8, random_state=42, shuffle=True)
train

Unnamed: 0,age,gender,polyuria,polydipsia,sudden_weight_loss,polyphagia,delayed_healing,obesity,class
434,53,1,1,1,1,1,0,0,1
436,41,1,1,1,1,1,0,1,1
208,54,1,0,0,1,0,1,0,0
332,50,0,0,0,0,0,1,0,0
220,38,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
71,35,0,0,1,1,0,1,0,1
106,58,1,0,1,1,1,0,1,1
270,40,0,1,1,1,0,1,0,1
435,57,1,1,1,1,1,0,0,1


In [6]:
train_feature = train.loc[:, train.columns != "class"]
train_target = train["class"]

test_feature = test.loc[:, test.columns != "class"]
test_target = test["class"]

test_target

275    0
93     1
6      1
167    1
90     1
      ..
69     1
357    0
423    1
334    0
227    0
Name: class, Length: 104, dtype: int64

In [7]:
model = DecisionTreeClassifier()
history = model.fit(train_feature,train_target)
y_pred = model.predict(test_feature)

print("classification_report :\n" + str(classification_report(test_target,y_pred)))
print("confusion_matrix :\n" + str(confusion_matrix(test_target,y_pred)))
print("accuracy_score :\n" + str(accuracy_score(test_target,y_pred)))

classification_report :
              precision    recall  f1-score   support

           0       0.92      1.00      0.96        33
           1       1.00      0.96      0.98        71

    accuracy                           0.97       104
   macro avg       0.96      0.98      0.97       104
weighted avg       0.97      0.97      0.97       104

confusion_matrix :
[[33  0]
 [ 3 68]]
accuracy_score :
0.9711538461538461


In [8]:
test_target

275    0
93     1
6      1
167    1
90     1
      ..
69     1
357    0
423    1
334    0
227    0
Name: class, Length: 104, dtype: int64

In [12]:
import pickle

pth = ""
# save the model to disk
filename = pth+'DT.sav'
pickle.dump(model, open(filename, 'wb'))
 
# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))
model_score = loaded_model.score(test_feature, test_target)
y_pred2 = loaded_model.predict(test_feature)

#print(test_feature)
print("accuracy_score :\n" + str(accuracy_score(test_target, y_pred2)))

accuracy_score :
0.9711538461538461


In [13]:
arr = [
    67,	1,	1,	0,	0,	1,	1,	1

]
new_data_dict = {'age':[arr[0]],'gender':[arr[1]],'polyuria':[arr[2]],'polydipsia':[arr[3]],'sudden_weight_loss':[arr[4]],'polyphagia':[arr[5]],'delayed_healing':[arr[6]],'obesity':[arr[7]]
}

features = pd.DataFrame(new_data_dict, index=[0])  # Convert JSON data to DataFrame
prediction = loaded_model.predict(features) 
probability = loaded_model.predict_proba(features)

confidenceN = probability[0][0]
confidenceP = probability[0][1]

print("confidence of 0: " + str(confidenceN))
print("confidence of 1: " + str(confidenceP))
print(prediction)

confidence of 0: 1.0
confidence of 1: 0.0
[0]
