In [1]:
import pandas as pd

In [2]:
Dataset=pd.read_csv('CKD.csv')

In [3]:
Dataset.columns

Index(['age', 'bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr', 'bu',
       'sc', 'sod', 'pot', 'hrmo', 'pcv', 'wc', 'rc', 'htn', 'dm', 'cad',
       'appet', 'pe', 'ane', 'classification'],
      dtype='object')

In [4]:
Dataset.head()

Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,2.0,76.459948,c,3,0,normal,abnormal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,yes,no,yes
1,3.0,76.459948,c,2,0,normal,normal,notpresent,notpresent,148.112676,...,34.0,12300.0,4.705597,no,no,no,yes,poor,no,yes
2,4.0,76.459948,a,1,0,normal,normal,notpresent,notpresent,99.0,...,34.0,8408.191126,4.705597,no,no,no,yes,poor,no,yes
3,5.0,76.459948,d,1,0,normal,normal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,poor,yes,yes
4,5.0,50.0,c,0,0,normal,normal,notpresent,notpresent,148.112676,...,36.0,12400.0,4.705597,no,no,no,yes,poor,no,yes


In [5]:
Dataset=pd.get_dummies(Dataset,dtype=int,drop_first=True)

In [6]:
Dataset.head()

Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.0,76.459948,3,0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,0,0,0,0,0,0,1,1,0,1
1,3.0,76.459948,2,0,148.112676,22.0,0.7,137.528754,4.627244,10.7,...,1,0,0,0,0,0,1,0,0,1
2,4.0,76.459948,1,0,99.0,23.0,0.6,138.0,4.4,12.0,...,1,0,0,0,0,0,1,0,0,1
3,5.0,76.459948,1,0,148.112676,16.0,0.7,138.0,3.2,8.1,...,1,0,0,0,0,0,1,0,1,1
4,5.0,50.0,0,0,148.112676,25.0,0.6,137.528754,4.627244,11.8,...,1,0,0,0,0,0,1,0,0,1


In [7]:
Dataset.columns

Index(['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes', 'classification_yes'],
      dtype='object')

In [8]:
Independent=Dataset[['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes',]]
Dependent=Dataset[['classification_yes']]

In [9]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(Independent,Dependent,test_size=0.30,random_state=0)

In [10]:
from sklearn.preprocessing import StandardScaler
scx=StandardScaler()
X_train=scx.fit_transform(X_train)
X_test=scx.fit_transform(X_test)

In [12]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier


Param_grid={'criterion':['gini', 'entropy', 'log_loss'],
            'max_features': ['sqrt','log2',None],
            'splitter':['best','random']
           }

grid = GridSearchCV(DecisionTreeClassifier(), Param_grid, refit = True, verbose = 3,n_jobs=-1,scoring='f1_weighted') 

grid.fit(X_train, Y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


In [13]:
re=grid.cv_results_

In [14]:
grid_Pred=grid.predict(X_test)

In [15]:
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(grid_Pred,Y_test)
print(cm)

[[43  7]
 [ 2 68]]


In [16]:
from sklearn.metrics import classification_report
clf_report=classification_report(grid_Pred,Y_test)
print(clf_report)

              precision    recall  f1-score   support

           0       0.96      0.86      0.91        50
           1       0.91      0.97      0.94        70

    accuracy                           0.93       120
   macro avg       0.93      0.92      0.92       120
weighted avg       0.93      0.93      0.92       120



In [17]:
from sklearn.metrics import f1_score
f1_macro=f1_score(grid_Pred,Y_test)
print("The f1 sore of the best parameter{}:".format(grid.best_params_),f1_score)

The f1 sore of the best parameter{'criterion': 'entropy', 'max_features': 'log2', 'splitter': 'random'}: <function f1_score at 0x000001CF231FD260>


In [18]:
from sklearn.metrics import roc_auc_score
roc_auc_score(Y_test,grid.predict_proba(X_test)[:,1])

0.9311111111111112

In [19]:
table=pd.DataFrame.from_dict(re)
print(table)

    mean_fit_time  std_fit_time  mean_score_time  std_score_time  \
0        0.004985      0.000631         0.010454        0.001428   
1        0.004457      0.001967         0.011954        0.003871   
2        0.005469      0.003517         0.013962        0.006039   
3        0.003198      0.000414         0.010060        0.001221   
4        0.004119      0.000656         0.010204        0.002150   
5        0.003438      0.000473         0.012157        0.004118   
6        0.003239      0.000383         0.012132        0.006838   
7        0.002740      0.000464         0.009072        0.000201   
8        0.003389      0.000488         0.010963        0.002090   
9        0.002990      0.000631         0.008574        0.000489   
10       0.004385      0.000798         0.009540        0.000469   
11       0.002990      0.000630         0.010881        0.002810   
12       0.002821      0.000419         0.009369        0.000798   
13       0.002990      0.000630         0.010365