In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv('CKD.csv')

In [3]:
dataset = pd.get_dummies(dataset, dtype=int, drop_first=True)

In [4]:
dataset

Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.000000,76.459948,3.0,0.0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,0,0,0,0,0,0,1,1,0,1
1,3.000000,76.459948,2.0,0.0,148.112676,22.000000,0.700000,137.528754,4.627244,10.700000,...,1,0,0,0,0,0,1,0,0,1
2,4.000000,76.459948,1.0,0.0,99.000000,23.000000,0.600000,138.000000,4.400000,12.000000,...,1,0,0,0,0,0,1,0,0,1
3,5.000000,76.459948,1.0,0.0,148.112676,16.000000,0.700000,138.000000,3.200000,8.100000,...,1,0,0,0,0,0,1,0,1,1
4,5.000000,50.000000,0.0,0.0,148.112676,25.000000,0.600000,137.528754,4.627244,11.800000,...,1,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,0.0,0.0,219.000000,36.000000,1.300000,139.000000,3.700000,12.500000,...,1,0,0,0,0,0,1,0,0,1
395,51.492308,70.000000,0.0,2.0,220.000000,68.000000,2.800000,137.528754,4.627244,8.700000,...,1,0,0,1,1,0,1,0,1,1
396,51.492308,70.000000,3.0,0.0,110.000000,115.000000,6.000000,134.000000,2.700000,9.100000,...,1,0,0,1,1,0,0,0,0,1
397,51.492308,90.000000,0.0,0.0,207.000000,80.000000,6.800000,142.000000,5.500000,8.500000,...,1,0,0,1,1,0,1,0,1,1


In [28]:
indep=dataset[['bgr', 'bu', 'sc', 'pcv', 'wc']]
dep=dataset['classification_yes']

In [29]:
indep

Unnamed: 0,bgr,bu,sc,pcv,wc
0,148.112676,57.482105,3.077356,38.868902,8408.191126
1,148.112676,22.000000,0.700000,34.000000,12300.000000
2,99.000000,23.000000,0.600000,34.000000,8408.191126
3,148.112676,16.000000,0.700000,38.868902,8408.191126
4,148.112676,25.000000,0.600000,36.000000,12400.000000
...,...,...,...,...,...
394,219.000000,36.000000,1.300000,37.000000,9800.000000
395,220.000000,68.000000,2.800000,27.000000,8408.191126
396,110.000000,115.000000,6.000000,26.000000,9200.000000
397,207.000000,80.000000,6.800000,38.868902,8408.191126


In [30]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(indep, dep, test_size = 1/3, random_state = 0)

In [31]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [32]:
from sklearn.svm import SVC

In [33]:
from sklearn.model_selection import GridSearchCV

param_grid = {'kernel':['linear', 'poly', 'rbf','sigmoid'],
             'gamma':['auto','scale'],
             'C':[10,100,1000,2000,3000],
             'decision_function_shape':['ovo', 'ovr']} 

grid = GridSearchCV(SVC(probability=True), param_grid, refit = True, verbose = 3,cv=5,n_jobs=-1,scoring='f1_weighted') 
   
# fitting the model for grid search 
grid.fit(X_train, y_train) 

Fitting 5 folds for each of 80 candidates, totalling 400 fits


In [34]:
re=grid.cv_results_
grid_predictions = grid.predict(X_test) 

In [35]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, grid_predictions)

In [36]:
print(cm)

[[48  3]
 [ 3 79]]


In [37]:
# print classification report 
from sklearn.metrics import classification_report
clf_report = classification_report(y_test, grid_predictions)

In [38]:
print(clf_report)

              precision    recall  f1-score   support

           0       0.94      0.94      0.94        51
           1       0.96      0.96      0.96        82

    accuracy                           0.95       133
   macro avg       0.95      0.95      0.95       133
weighted avg       0.95      0.95      0.95       133



In [39]:
from sklearn.metrics import f1_score
f1_macro=f1_score(y_test,grid_predictions,average='weighted')
print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)

The f1_macro value for best parameter {'C': 10, 'decision_function_shape': 'ovo', 'gamma': 'auto', 'kernel': 'rbf'}: 0.9548872180451128


In [40]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test,grid.predict_proba(X_test)[:,1])

0.9901960784313726

In [41]:
table=pd.DataFrame.from_dict(re)

In [42]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_decision_function_shape,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.011710,0.008378,0.008135,0.002392,10,ovo,auto,linear,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.963284,0.885265,0.888515,0.943093,0.961826,0.928397,0.034645,21
1,0.006463,0.004160,0.015104,0.014366,10,ovo,auto,poly,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.809671,0.841025,0.925146,0.867217,0.881521,0.864916,0.038811,79
2,0.010136,0.003470,0.003418,0.001420,10,ovo,auto,rbf,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.944707,0.981233,0.925524,0.981031,0.981031,0.962705,0.023329,1
3,0.005428,0.001195,0.003194,0.000156,10,ovo,auto,sigmoid,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.888889,0.885265,0.888286,0.867217,0.924528,0.890837,0.018625,53
4,0.007119,0.001251,0.003303,0.001023,10,ovo,scale,linear,"{'C': 10, 'decision_function_shape': 'ovo', 'g...",0.963284,0.885265,0.888515,0.943093,0.961826,0.928397,0.034645,21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,0.004160,0.000736,0.002686,0.000638,3000,ovr,auto,sigmoid,"{'C': 3000, 'decision_function_shape': 'ovr', ...",0.888889,0.844370,0.851354,0.886792,0.924528,0.879187,0.028964,63
76,0.187167,0.056138,0.001816,0.001027,3000,ovr,scale,linear,"{'C': 3000, 'decision_function_shape': 'ovr', ...",0.925926,0.905069,0.888515,0.943093,0.961826,0.924886,0.026125,25
77,0.079820,0.025580,0.003323,0.002108,3000,ovr,scale,poly,"{'C': 3000, 'decision_function_shape': 'ovr', ...",0.872586,0.887907,0.888515,0.924528,0.943093,0.903326,0.026203,45
78,0.012187,0.004370,0.002209,0.000425,3000,ovr,scale,rbf,"{'C': 3000, 'decision_function_shape': 'ovr', ...",0.981569,0.922185,0.962573,0.943093,0.981031,0.958090,0.022855,5


In [43]:
import pickle
filename="finilized_model_SVC.sav"

In [44]:
pickle.dump(grid,open(filename,'wb'))

In [45]:
preinput_data = [[22, 2.0, 3.0, 2.0,3]]

In [46]:
preinput = sc.transform(preinput_data)



In [47]:
preinput

array([[-1.6726475 , -1.10221949, -0.0341477 , -4.55435782, -3.46368741]])

In [48]:
loaded_model=pickle.load(open("finilized_model_SVC.sav",'rb'))
result=loaded_model.predict(preinput)

In [49]:
result

array([1])