In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
dataset=pd.read_csv("CKD.csv")

In [3]:
dataset=pd.get_dummies(dataset,drop_first=True)

In [4]:
dataset["classification_yes"].value_counts()

classification_yes
True     249
False    150
Name: count, dtype: int64

In [5]:
indep=dataset[["age",'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes']]
dep=dataset["classification_yes"]

In [6]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(indep,dep,test_size=1/3,random_state=0)

In [7]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
param_grid={'criterion':['gini','entropy'],
            'max_features':['auto','sqrt','log2'],
            'n_estimators':[10,100]}
grid=GridSearchCV(RandomForestClassifier(),param_grid,refit=True,verbose=3,n_jobs=-1,scoring='f1')
grid.fit(X_train,Y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


20 fits failed out of a total of 60.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
12 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Anaconda3\Lib\site-packages\sklearn\base.py", line 1466, in wrapper
    estimator._validate_params()
  File "C:\Anaconda3\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidPa

In [9]:
re=grid.cv_results_
grid_predictions=grid.predict(X_test)

In [10]:
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(Y_test,grid_predictions)

In [11]:
from sklearn.metrics import classification_report
clf_report=classification_report(Y_test,grid_predictions)

In [12]:
from sklearn.metrics import f1_score
f1_macro=f1_score(Y_test,grid_predictions,average='weighted')
print("The F1 macro value for the best parameter{}:\n".format(grid.best_params_),f1_macro)
print("The Confusion matrix:\n",cm)
print("The report:\n",clf_report)

The F1 macro value for the best parameter{'criterion': 'gini', 'max_features': 'sqrt', 'n_estimators': 100}:
 0.9849624060150376
The Confusion matrix:
 [[50  1]
 [ 1 81]]
The report:
               precision    recall  f1-score   support

       False       0.98      0.98      0.98        51
        True       0.99      0.99      0.99        82

    accuracy                           0.98       133
   macro avg       0.98      0.98      0.98       133
weighted avg       0.98      0.98      0.98       133



In [13]:
from sklearn.metrics import roc_auc_score
roc_auc_score(Y_test,grid.predict_proba(X_test)[:,1])

0.9997608799617408

In [14]:
table=pd.DataFrame.from_dict(re)

In [15]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002042,5.1e-05,0.0,0.0,gini,auto,10,"{'criterion': 'gini', 'max_features': 'auto', ...",,,,,,,,9
1,0.002452,0.001361,0.0,0.0,gini,auto,100,"{'criterion': 'gini', 'max_features': 'auto', ...",,,,,,,,9
2,0.024877,0.002951,0.006471,0.000668,gini,sqrt,10,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.969697,0.971429,0.984615,0.969697,1.0,0.979088,0.011857,7
3,0.210016,0.010735,0.009433,0.001186,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",1.0,0.985507,0.984615,0.985075,0.985075,0.988054,0.005979,1
4,0.023881,0.003269,0.005377,0.000492,gini,log2,10,"{'criterion': 'gini', 'max_features': 'log2', ...",0.985075,0.956522,0.96875,1.0,1.0,0.982069,0.017217,6
5,0.169451,0.006079,0.008747,0.000384,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",1.0,0.971429,0.984615,0.985075,0.985075,0.985239,0.009045,2
6,0.000401,0.000491,0.0,0.0,entropy,auto,10,"{'criterion': 'entropy', 'max_features': 'auto...",,,,,,,,9
7,0.001144,0.000748,0.0,0.0,entropy,auto,100,"{'criterion': 'entropy', 'max_features': 'auto...",,,,,,,,9
8,0.018378,0.001793,0.003628,0.0006,entropy,sqrt,10,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.969697,0.971429,0.952381,0.955224,0.985075,0.966761,0.01188,8
9,0.175387,0.006222,0.007608,0.004068,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",1.0,0.971429,0.96875,0.985075,1.0,0.985051,0.013403,4


In [16]:
age = float(input("Age: "))
bp = float(input("Blood Pressure: "))
al = float(input("Albumin: "))
su = float(input("Sugar: "))
bgr = float(input("Blood Glucose: "))
bu = float(input("Blood Urea: "))
sc_val = float(input("Serum Creatinine: "))
sod = float(input("Sodium: "))
pot = float(input("Potassium: "))
hrmo = float(input("Hemoglobin: "))
pcv = float(input("Packed Cell Volume: "))
wc = float(input("White Blood Cell Count: "))
rc = float(input("Red Blood Cell Count: "))
sg_b = float(input("SG_b (0/1): "))
sg_c = float(input("SG_c (0/1): "))
sg_d = float(input("SG_d (0/1): "))
sg_e = float(input("SG_e (0/1): "))
rbc_normal = float(input("RBC_normal (0/1): "))
pc_normal = float(input("PC_normal (0/1): "))
pcc_present = float(input("PCC_present (0/1): "))
ba_present = float(input("BA_present (0/1): "))
htn_yes = float(input("Hypertension (0/1): "))
dm_yes = float(input("Diabetes Mellitus (0/1): "))
cad_yes = float(input("Coronary Artery Disease (0/1): "))
appet_yes = float(input("Appetite Normal (0/1): "))
pe_yes = float(input("Pedal Edema (0/1): "))
ane_yes = float(input("Anemia (0/1): "))


Age:  55
Blood Pressure:  250
Albumin:  3
Sugar:  500
Blood Glucose:  236
Blood Urea:  25
Serum Creatinine:  236
Sodium:  536
Potassium:  6
Hemoglobin:  13
Packed Cell Volume:  123
White Blood Cell Count:  25
Red Blood Cell Count:  563
SG_b (0/1):  1
SG_c (0/1):  1
SG_d (0/1):  1
SG_e (0/1):  0
RBC_normal (0/1):  0
PC_normal (0/1):  0
PCC_present (0/1):  1
BA_present (0/1):  0
Hypertension (0/1):  1
Diabetes Mellitus (0/1):  1
Coronary Artery Disease (0/1):  0
Appetite Normal (0/1):  1
Pedal Edema (0/1):  0
Anemia (0/1):  0


In [17]:
Future_Prediction=grid.predict([[age,bp,al,su,bgr,bu,sc_val,sod,pot,hrmo,pcv,wc,rc,sg_b,sg_c,sg_d,sg_e,rbc_normal,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes]])
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[ True]
