In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.model_selection import train_test_split,cross_val_score,StratifiedKFold

In [2]:
dataframe = pd.read_csv('diabetes.csv')
dataframe

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [3]:
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   preg    768 non-null    int64  
 1   plas    768 non-null    int64  
 2   pres    768 non-null    int64  
 3   skin    768 non-null    int64  
 4   test    768 non-null    int64  
 5   mass    768 non-null    float64
 6   pedi    768 non-null    float64
 7   age     768 non-null    int64  
 8   class   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [4]:
dataframe.describe()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [5]:
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

In [6]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.2,stratify = Y)

In [7]:
X_train.shape,Y_train.shape,X_test.shape,Y_test.shape

((614, 8), (614,), (154, 8), (154,))

In [11]:
clf = SVC()
clf.fit(X_train,Y_train)

In [13]:
Y_predict = clf.predict(X_test)

In [15]:
print(classification_report(Y_test,Y_predict))

              precision    recall  f1-score   support

         0.0       0.73      0.87      0.79       100
         1.0       0.63      0.41      0.49        54

    accuracy                           0.71       154
   macro avg       0.68      0.64      0.64       154
weighted avg       0.70      0.71      0.69       154



In [17]:
accuracy_score(Y_train,clf.predict(X_train))

0.7817589576547231

#### Hyper parameter Tuning With Randomised Grid Search CV

In [20]:
clf = SVC()
param_grid = [{'kernel':['linear','rbf'],'gamma':[0.1,0.5,1],'C':[0.1,1,10]}]
kfold = StratifiedKFold(n_splits = 5)
gsv = RandomizedSearchCV(clf,param_grid,cv=kfold,scoring='recall')
gsv.fit(X_train,Y_train)

In [21]:
gsv.best_params_,gsv.best_score_

({'kernel': 'linear', 'gamma': 0.5, 'C': 10}, 0.5892580287929124)

In [24]:
#### Important word:Regularisation Constant

In [28]:
clf_model = SVC(kernel="linear",C =1)
clf_model.fit(X_train,Y_train)
y_pred =clf_model.predict(X_test)
acc = accuracy_score(Y_test,y_pred) * 100
print("Accuracy:",acc)
confusion_matrix(Y_test,y_pred)

Accuracy: 72.72727272727273


array([[84, 16],
       [26, 28]], dtype=int64)

In [30]:
y_pred

array([0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 1., 0.,
       0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0.,
       1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
       1., 1., 0., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.,
       1., 1., 0., 1., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1.,
       1.])

In [32]:
confusion_matrix(Y_test,y_pred)

array([[84, 16],
       [26, 28]], dtype=int64)

In [36]:
print(classification_report(Y_test,y_pred))

              precision    recall  f1-score   support

         0.0       0.76      0.84      0.80       100
         1.0       0.64      0.52      0.57        54

    accuracy                           0.73       154
   macro avg       0.70      0.68      0.69       154
weighted avg       0.72      0.73      0.72       154

