In [1]:
import numpy as np
import matplotlib as plt
import pandas as pd

In [2]:
dataset=pd.read_csv("CKD.csv")
dataset=pd.get_dummies(dataset,drop_first=True)

In [4]:
rows, columns = dataset.shape

In [5]:
dataset.shape

(399, 28)

In [6]:
print(dataset.columns.tolist())

['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv', 'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal', 'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes', 'appet_yes', 'pe_yes', 'ane_yes', 'classification_yes']


In [7]:
indep=dataset[["age", "bp", "al", "su", "bgr", "bu", "sc", "sod", "pot", "hrmo", "pcv", "wc", "rc", "sg_b", "sg_c", "sg_d", "sg_e", "rbc_normal", "pc_normal", "pcc_present", "ba_present", "htn_yes", "dm_yes", "cad_yes", "appet_yes", "pe_yes", "ane_yes"]]
dep=dataset["classification_yes"]

In [8]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(indep, dep, test_size=1/3, random_state=0)

In [9]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train= sc.fit_transform(x_train)
x_test= sc.fit_transform(x_test)

In [10]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
param_grid = {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],'gamma': ['scale', 'auto'],'C':[1.0,10,100,1000,2000,3000]}
grid = GridSearchCV(SVC(probability=True),param_grid,refit=True,verbose=3,n_jobs=-1,scoring='f1_weighted')
grid.fit(x_train,y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits


In [11]:
re=grid.cv_results_
grid_predictions=grid.predict(x_test)

from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,grid_predictions)

from sklearn.metrics import classification_report
clf_report=classification_report(y_test,grid_predictions)

In [12]:
from sklearn.metrics import f1_score
f1_macro= f1_score(y_test,grid_predictions,average='weighted')
print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)

The f1_macro value for best parameter {'C': 10, 'gamma': 'scale', 'kernel': 'sigmoid'}: 1.0


In [13]:
print ("The confusion Matrix:\n",cm)

The confusion Matrix:
 [[51  0]
 [ 0 82]]


In [14]:
print("The report:\n",clf_report)

The report:
               precision    recall  f1-score   support

       False       1.00      1.00      1.00        51
        True       1.00      1.00      1.00        82

    accuracy                           1.00       133
   macro avg       1.00      1.00      1.00       133
weighted avg       1.00      1.00      1.00       133



In [15]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test,grid.predict_proba(x_test)[:,1])


1.0

In [16]:
import pickle 
best_model = grid.best_estimator_
filename = "Finalized_model_Classification_CKD.sav"
pickle.dump(best_model, open(filename, 'wb'))
loaded_model = pickle.load(open("Finalized_model_Classification_CKD.sav", 'rb'))

In [17]:
from sklearn.tree import DecisionTreeClassifier
param_grid = { 'criterion': ['gini', 'entropy'],'max_depth': [None, 5, 10, 20, 30],'min_samples_split': [2, 5, 10],'min_samples_leaf': [1, 2, 4]}
grid = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=param_grid, scoring='f1_weighted', refit=True, verbose=3, n_jobs=-1)
grid.fit(x_train, y_train)

Fitting 5 folds for each of 90 candidates, totalling 450 fits


In [18]:
re=grid.cv_results_
grid_predictions=grid.predict(x_test)

from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,grid_predictions)

from sklearn.metrics import classification_report
clf_report=classification_report(y_test,grid_predictions)

In [19]:
from sklearn.metrics import f1_score
f1_macro= f1_score(y_test,grid_predictions,average='weighted')
print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)

The f1_macro value for best parameter {'criterion': 'gini', 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2}: 0.9182450459190696


In [20]:
print ("The confusion Matrix:\n",cm)

The confusion Matrix:
 [[50  1]
 [10 72]]


In [21]:
print("The report:\n",clf_report)

The report:
               precision    recall  f1-score   support

       False       0.83      0.98      0.90        51
        True       0.99      0.88      0.93        82

    accuracy                           0.92       133
   macro avg       0.91      0.93      0.91       133
weighted avg       0.93      0.92      0.92       133



In [22]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test,grid.predict_proba(x_test)[:,1])

0.929220468675275

In [23]:
from sklearn.ensemble import RandomForestClassifier
param_grid = {
    'n_estimators': [50, 100, 200],          
    'criterion': ['gini', 'entropy','log_loss']}
grid = GridSearchCV(estimator=RandomForestClassifier(), param_grid=param_grid, scoring='f1_weighted', refit=True, verbose=3, n_jobs=-1)
grid.fit(x_train, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits


In [24]:
re=grid.cv_results_
grid_predictions=grid.predict(x_test)

from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,grid_predictions)

from sklearn.metrics import classification_report
clf_report=classification_report(y_test,grid_predictions)

In [25]:
from sklearn.metrics import f1_score
f1_macro= f1_score(y_test,grid_predictions,average='weighted')
print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)

The f1_macro value for best parameter {'criterion': 'log_loss', 'n_estimators': 100}: 0.9849624060150376


In [26]:
print ("The confusion Matrix:\n",cm)

The confusion Matrix:
 [[50  1]
 [ 1 81]]


In [27]:
print("The report:\n",clf_report)

The report:
               precision    recall  f1-score   support

       False       0.98      0.98      0.98        51
        True       0.99      0.99      0.99        82

    accuracy                           0.98       133
   macro avg       0.98      0.98      0.98       133
weighted avg       0.98      0.98      0.98       133



In [28]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test,grid.predict_proba(x_test)[:,1])

0.9997608799617408

In [29]:
from sklearn.linear_model import LogisticRegression
param_grid = {'penalty': ['l1', 'l2', 'elasticnet', None],'solver': ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']}
grid = GridSearchCV(LogisticRegression(),param_grid,refit=True,verbose=3,n_jobs=-1,scoring='f1_weighted')
grid.fit(x_train,y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


55 fits failed out of a total of 120.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\sharu\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\sharu\anaconda3\Lib\site-packages\sklearn\base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\sharu\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1194, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [30]:
re=grid.cv_results_
grid_predictions=grid.predict(x_test)

from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,grid_predictions)

from sklearn.metrics import classification_report
clf_report=classification_report(y_test,grid_predictions)

In [31]:
from sklearn.metrics import f1_score
f1_macro= f1_score(y_test,grid_predictions,average='weighted')
print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)

The f1_macro value for best parameter {'penalty': 'l2', 'solver': 'sag'}: 1.0


In [32]:
print ("The confusion Matrix:\n",cm)

The confusion Matrix:
 [[51  0]
 [ 0 82]]


In [33]:
print("The report:\n",clf_report)

The report:
               precision    recall  f1-score   support

       False       1.00      1.00      1.00        51
        True       1.00      1.00      1.00        82

    accuracy                           1.00       133
   macro avg       1.00      1.00      1.00       133
weighted avg       1.00      1.00      1.00       133



In [34]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test,grid.predict_proba(x_test)[:,1])

1.0

In [35]:
from sklearn.neighbors import KNeighborsClassifier
param_grid = {'n_neighbors': [3, 5, 7, 9, 11],'weights': ['uniform', 'distance'],'metric': ['minkowski', 'euclidean', 'manhattan'],'p': [1, 2] }
grid = GridSearchCV(KNeighborsClassifier(),param_grid,refit=True,verbose=3,n_jobs=-1,scoring='f1_weighted')
grid.fit(x_train,y_train)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


In [36]:
re=grid.cv_results_
grid_predictions=grid.predict(x_test)

from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,grid_predictions)

from sklearn.metrics import classification_report
clf_report=classification_report(y_test,grid_predictions)

In [37]:
from sklearn.metrics import f1_score
f1_macro= f1_score(y_test,grid_predictions,average='weighted')
print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)

The f1_macro value for best parameter {'metric': 'minkowski', 'n_neighbors': 3, 'p': 1, 'weights': 'uniform'}: 0.9775556904684072


In [38]:
print ("The confusion Matrix:\n",cm)

The confusion Matrix:
 [[51  0]
 [ 3 79]]


In [39]:
print("The report:\n",clf_report)

The report:
               precision    recall  f1-score   support

       False       0.94      1.00      0.97        51
        True       1.00      0.96      0.98        82

    accuracy                           0.98       133
   macro avg       0.97      0.98      0.98       133
weighted avg       0.98      0.98      0.98       133



In [40]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test,grid.predict_proba(x_test)[:,1])

0.9935437589670014