In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
dataset = pd.read_csv('CKD.csv')

In [3]:
dataset = pd.get_dummies(dataset, dtype  = int, drop_first = True)

In [5]:
independent = dataset.drop(columns = ['classification_yes'])

In [6]:
dependent = dataset['classification_yes']

In [11]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(independent, dependent, test_size = 0.3, random_state = 0)

In [13]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

In [17]:
from sklearn.tree import DecisionTreeClassifier

In [31]:
from sklearn.model_selection import GridSearchCV

paramgrid = [{'criterion': ['gini', 'entropy', 'log_loss'],
              'max_features': ['log2', 'sqrt', None],
              'splitter': ['best', 'random']}]


grid = GridSearchCV(DecisionTreeClassifier(),paramgrid, refit = True, verbose = 3, n_jobs = -1, scoring = 'f1_weighted')
grid.fit(xtrain, ytrain)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


In [32]:
re = grid.cv_results_

ypred = grid.predict(xtest)

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(ytest,ypred)

from sklearn.metrics import classification_report
clfreport = classification_report(ytest,ypred)

from sklearn.metrics import f1_score
f1score = f1_score(ytest,ypred,average = 'weighted')
print ("The best grid parameter {}" .format(grid.best_params_), f1score)

The best grid parameter {'criterion': 'log_loss', 'max_features': 'log2', 'splitter': 'random'} 0.9916474440062505


In [33]:
print("The best confusion matrix: \n", cm)

The best confusion matrix: 
 [[44  1]
 [ 0 75]]


In [34]:
print("The best classification report: \n", clfreport)

The best classification report: 
               precision    recall  f1-score   support

           0       1.00      0.98      0.99        45
           1       0.99      1.00      0.99        75

    accuracy                           0.99       120
   macro avg       0.99      0.99      0.99       120
weighted avg       0.99      0.99      0.99       120



In [35]:
from sklearn.metrics import roc_auc_score
ras = roc_auc_score(ytest,grid.predict_proba(xtest)[:,1])
print(ras)

0.9888888888888888


In [36]:
re

{'mean_fit_time': array([0.00610747, 0.00597663, 0.00486302, 0.00394254, 0.00450134,
        0.00467706, 0.00390873, 0.00428233, 0.00437961, 0.00381274,
        0.00525789, 0.00476389, 0.00278721, 0.0032764 , 0.00350652,
        0.00262189, 0.00392036, 0.00334225]),
 'std_fit_time': array([0.00119904, 0.00047659, 0.00087409, 0.00034545, 0.00030617,
        0.00028014, 0.00056021, 0.00053803, 0.00053145, 0.00054109,
        0.0005079 , 0.00046333, 0.00049496, 0.00063108, 0.00111042,
        0.00022168, 0.0002914 , 0.00075486]),
 'mean_score_time': array([0.00933723, 0.01061573, 0.00942388, 0.00904136, 0.00737767,
        0.00863171, 0.00834045, 0.00919247, 0.00948296, 0.00778413,
        0.00937924, 0.00802927, 0.00761857, 0.00777416, 0.00594454,
        0.00620885, 0.00593696, 0.0079247 ]),
 'std_score_time': array([0.00057312, 0.00146157, 0.00042766, 0.00144063, 0.00069024,
        0.00155888, 0.00112056, 0.00066386, 0.00089908, 0.00074894,
        0.00154136, 0.00102055, 0.0013285 , 