# Support Vector Machine

Implementing SVM on diabetes.csv dataset. 

In [3]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn import model_selection
from sklearn import metrics
from sklearn import datasets 
from sklearn import preprocessing
import pickle

In [5]:
data = pd.read_csv('diabetes.csv')


In [6]:
data.shape

(768, 9)

In [7]:
data.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [8]:
data.isnull().sum() #check if there are missing values

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [10]:
round(data.describe(),2)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.85,120.89,69.11,20.54,79.8,31.99,0.47,33.24,0.35
std,3.37,31.97,19.36,15.95,115.24,7.88,0.33,11.76,0.48
min,0.0,0.0,0.0,0.0,0.0,0.0,0.08,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.37,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.63,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [15]:
y=data['Outcome']
y

0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Outcome, Length: 768, dtype: int64

In [16]:
x=data.drop(['Outcome'],axis=1)
x

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63
764,2,122,70,27,0,36.8,0.340,27
765,5,121,72,23,112,26.2,0.245,30
766,1,126,60,0,0,30.1,0.349,47


In [23]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size = 0.33, random_state = 9)
cols=x_train.columns
scaler = preprocessing.StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)


In [24]:
x_train = pd.DataFrame(x_train, columns=[cols])
x_train

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,-0.834467,-0.489117,0.032037,0.445988,0.422459,0.256126,-1.010797,-0.943022
1,-0.538459,-0.457517,0.240482,0.572185,0.131638,0.181537,-0.204153,-0.858885
2,1.533600,-0.046723,0.136260,0.067395,-0.233967,-1.409685,0.808770,1.244540
3,-1.130476,-0.552316,-0.280630,0.130494,0.264585,-0.539485,-0.050213,-0.858885
4,-0.242451,-1.911096,0.657373,0.445988,-0.699281,0.280989,-0.699839,1.076266
...,...,...,...,...,...,...,...,...
509,0.941583,2.070446,-0.072185,1.140074,1.826712,0.691226,-0.665972,0.655581
510,-0.242451,-1.184306,0.136260,0.698383,-0.699281,0.629069,-0.625948,-0.438200
511,0.941583,-0.773512,0.344705,0.698383,0.056855,1.089031,1.233644,-0.101652
512,-0.242451,-0.710313,-0.384853,-0.121901,-0.084401,-1.285371,-0.589002,-0.606474


In [25]:
x_test = pd.DataFrame(x_test, columns=[cols])

In [28]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

svc=SVC()
svc.fit(x_train,y_train)
y_pred=svc.predict(x_test)
print('Model accuracy score with default hyperparameters: {0:0.4f}'. format(accuracy_score(y_test, y_pred)))


Model accuracy score with default hyperparameters: 0.7165


In [29]:
metrics.mean_squared_error(y_predicted, y_test)

0.1809227531664077

In [32]:
poly_svc=SVC(kernel='poly', C=1.0) 


# fit classifier to training set
poly_svc.fit(x_train,y_train)


# make predictions on test set
y_pred=poly_svc.predict(x_test)


# compute and print accuracy score
print('Model accuracy score with polynomial kernel and C=1.0 : {0:0.4f}'. format(accuracy_score(y_test, y_pred)))

Model accuracy score with polynomial kernel and C=1.0 : 0.7087
