In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
dataset = pd.read_csv("CKD.csv")

In [3]:
dataset = pd.get_dummies(dataset, dtype = int, drop_first=True)

In [4]:
dataset.columns

Index(['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes', 'classification_yes'],
      dtype='object')

In [5]:
independent = dataset.drop(columns=['classification_yes'])

In [6]:
dependent = dataset['classification_yes']

In [7]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(independent, dependent, test_size = 0.3, random_state = 0)

In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

In [9]:
from sklearn.svm import SVC

In [10]:
from sklearn.model_selection import GridSearchCV

paramgrid = [
    {'kernel':['linear'],'C':[10,100,1000,2000,3000]},
    {'kernel':['poly','rbf','sigmoid'], 'C':[10,100,1000,2000,3000], 'gamma':['scale','auto']}
     ]

svc = SVC(probability=True)

grid = GridSearchCV(svc,paramgrid, refit = True, verbose = 3, n_jobs = -1, scoring = 'f1_weighted')
grid.fit(xtrain,ytrain)


Fitting 5 folds for each of 35 candidates, totalling 175 fits


In [11]:
re = grid.cv_results_

ypred = grid.predict(xtest)

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(ytest,ypred)

from sklearn.metrics import classification_report

clfreport = classification_report(ytest,ypred)


In [12]:
from sklearn.metrics import f1_score
f1score = f1_score(ytest,ypred,average = 'weighted')
print("The f1 value of the best parameter:{}". format(grid.best_params_),f1score)

The f1 value of the best parameter:{'C': 10, 'gamma': 'scale', 'kernel': 'sigmoid'} 0.9834018801410106


In [13]:
print("The confusion matrix: \n", cm)

The confusion matrix: 
 [[45  0]
 [ 2 73]]


In [14]:
print("The classification report:/n", clfreport)

The classification report:/n               precision    recall  f1-score   support

           0       0.96      1.00      0.98        45
           1       1.00      0.97      0.99        75

    accuracy                           0.98       120
   macro avg       0.98      0.99      0.98       120
weighted avg       0.98      0.98      0.98       120



In [15]:
from sklearn.metrics import roc_auc_score
ras = roc_auc_score(ytest,grid.predict_proba(xtest)[:,1])
print(ras)

0.9997037037037036


In [16]:
re

{'mean_fit_time': array([0.01353312, 0.0118741 , 0.010464  , 0.00951715, 0.01063251,
        0.0222095 , 0.01096959, 0.01112247, 0.02167506, 0.0119895 ,
        0.01131487, 0.02112265, 0.01038294, 0.01011147, 0.01920471,
        0.01127157, 0.00944591, 0.01892133, 0.01094012, 0.00798173,
        0.01840591, 0.01098728, 0.0119206 , 0.02138276, 0.01171155,
        0.01036763, 0.02122817, 0.01137328, 0.01037173, 0.02156563,
        0.01153984, 0.01105123, 0.02006989, 0.01092858, 0.01089959]),
 'std_fit_time': array([0.00158419, 0.0022012 , 0.00115785, 0.00066962, 0.00140474,
        0.00105579, 0.0013511 , 0.00150668, 0.00179973, 0.0009857 ,
        0.00123992, 0.00194965, 0.00074928, 0.0009409 , 0.00199783,
        0.00176621, 0.00153362, 0.0026233 , 0.00122117, 0.00172183,
        0.00250272, 0.00204438, 0.00101726, 0.00226883, 0.00173842,
        0.00149344, 0.00286573, 0.0023113 , 0.00069204, 0.00272855,
        0.0011884 , 0.00131726, 0.0037485 , 0.00203642, 0.00131136]),
 'mean_scor