In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
dataset=pd.read_csv("CKD.csv")

In [3]:
dataset=pd.get_dummies(dataset,drop_first=True)

In [4]:
dataset["classification_yes"].value_counts()

classification_yes
True     249
False    150
Name: count, dtype: int64

In [5]:
indep=dataset[["age",'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes']]
dep=dataset["classification_yes"]

In [6]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(indep,dep,test_size=1/3,random_state=0)

In [7]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [8]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
param_grid={'criterion':['gini','entropy'],
            'max_features':[None,'sqrt','log2'],
            'splitter':['best','random']}
grid=GridSearchCV(DecisionTreeClassifier(random_state=0),param_grid,refit=True,verbose=3,n_jobs=-1,scoring='f1_weighted')
grid.fit(X_train,Y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


In [9]:
re=grid.cv_results_
grid_predictions=grid.predict(X_test)

In [10]:
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(Y_test,grid_predictions)

In [11]:
from sklearn.metrics import classification_report
clf_report=classification_report(Y_test,grid_predictions)

In [12]:
from sklearn.metrics import f1_score
f1_macro=f1_score(Y_test,grid_predictions,average='weighted')
print("The F1 macro value for the best parameter{}:\n".format(grid.best_params_),f1_macro)
print("The Confusion matrix:\n",cm)
print("The report:\n",clf_report)

The F1 macro value for the best parameter{'criterion': 'gini', 'max_features': None, 'splitter': 'random'}:
 0.9775556904684072
The Confusion matrix:
 [[51  0]
 [ 3 79]]
The report:
               precision    recall  f1-score   support

       False       0.94      1.00      0.97        51
        True       1.00      0.96      0.98        82

    accuracy                           0.98       133
   macro avg       0.97      0.98      0.98       133
weighted avg       0.98      0.98      0.98       133



In [13]:
from sklearn.metrics import roc_auc_score
roc_auc_score(Y_test,grid.predict_proba(X_test)[:,1])

0.9817073170731707

In [14]:
table=pd.DataFrame.from_dict(re)

In [15]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.004715,0.001108,0.004953,0.002058,gini,,best,"{'criterion': 'gini', 'max_features': None, 's...",0.944707,0.981014,0.962573,0.942332,0.981031,0.962331,0.016789,5
1,0.004226,0.001195,0.004207,0.001345,gini,,random,"{'criterion': 'gini', 'max_features': None, 's...",0.981569,0.962264,1.0,0.962573,0.981217,0.977525,0.014082,1
2,0.003778,0.001912,0.002959,0.001806,gini,sqrt,best,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.9451,0.84702,0.925524,0.943093,0.962264,0.9246,0.040498,12
3,0.003222,0.000827,0.004571,0.00135,gini,sqrt,random,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.962963,0.943041,0.981217,0.981031,1.0,0.97365,0.019273,3
4,0.003684,0.000816,0.004413,0.000968,gini,log2,best,"{'criterion': 'gini', 'max_features': 'log2', ...",0.981569,0.92351,0.981217,0.981031,0.961826,0.965831,0.022461,4
5,0.00339,0.002084,0.004936,0.002161,gini,log2,random,"{'criterion': 'gini', 'max_features': 'log2', ...",0.981569,0.90361,0.981217,0.943093,0.944023,0.950703,0.028997,8
6,0.003677,0.000936,0.004388,0.000969,entropy,,best,"{'criterion': 'entropy', 'max_features': None,...",0.890467,0.961755,0.944023,0.922492,0.981031,0.939954,0.03141,10
7,0.002464,0.000843,0.003673,0.000832,entropy,,random,"{'criterion': 'entropy', 'max_features': None,...",0.981569,0.961755,1.0,0.962573,0.981217,0.977423,0.014194,2
8,0.003275,0.00124,0.003839,0.001932,entropy,sqrt,best,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.925926,0.883278,0.907035,0.943093,1.0,0.931866,0.039449,11
9,0.002512,0.001064,0.004469,0.001258,entropy,sqrt,random,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.890467,0.943041,0.981217,0.962264,1.0,0.955398,0.037608,6


In [16]:
age = float(input("Age: "))
bp = float(input("Blood Pressure: "))
al = float(input("Albumin: "))
su = float(input("Sugar: "))
bgr = float(input("Blood Glucose: "))
bu = float(input("Blood Urea: "))
sc_val = float(input("Serum Creatinine: "))
sod = float(input("Sodium: "))
pot = float(input("Potassium: "))
hrmo = float(input("Hemoglobin: "))
pcv = float(input("Packed Cell Volume: "))
wc = float(input("White Blood Cell Count: "))
rc = float(input("Red Blood Cell Count: "))
sg_b = float(input("SG_b (0/1): "))
sg_c = float(input("SG_c (0/1): "))
sg_d = float(input("SG_d (0/1): "))
sg_e = float(input("SG_e (0/1): "))
rbc_normal = float(input("RBC_normal (0/1): "))
pc_normal = float(input("PC_normal (0/1): "))
pcc_present = float(input("PCC_present (0/1): "))
ba_present = float(input("BA_present (0/1): "))
htn_yes = float(input("Hypertension (0/1): "))
dm_yes = float(input("Diabetes Mellitus (0/1): "))
cad_yes = float(input("Coronary Artery Disease (0/1): "))
appet_yes = float(input("Appetite Normal (0/1): "))
pe_yes = float(input("Pedal Edema (0/1): "))
ane_yes = float(input("Anemia (0/1): "))


Age:  45
Blood Pressure:  235
Albumin:  3
Sugar:  200
Blood Glucose:  5
Blood Urea:  321
Serum Creatinine:  56
Sodium:  32
Potassium:  5
Hemoglobin:  10
Packed Cell Volume:  12
White Blood Cell Count:  2563
Red Blood Cell Count:  2589
SG_b (0/1):  1
SG_c (0/1):  1
SG_d (0/1):  0
SG_e (0/1):  0
RBC_normal (0/1):  1
PC_normal (0/1):  0
PCC_present (0/1):  1
BA_present (0/1):  0
Hypertension (0/1):  1
Diabetes Mellitus (0/1):  0
Coronary Artery Disease (0/1):  1
Appetite Normal (0/1):  1
Pedal Edema (0/1):  1
Anemia (0/1):  1


In [19]:
Future_Prediction=grid.predict([[age,bp,al,su,bgr,bu,sc_val,sod,pot,hrmo,pcv,wc,rc,sg_b,sg_c,sg_d,sg_e,rbc_normal,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes]])
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[ True]
