### Today Objectives

* SVM
* Hyperparameters in SVM
* GridSearchCV

### SVM (Support Vector Machine)

* SVM is a supervised ML algorithm
* It is used for both regression and classification
* SVM is mostly used for classification
* SVM Creates a boundary lines between classes. This boundary line is called Hyperplane
* SVM provides maximum distance between classes
* The data points/vectors which is close to hyperplane is called support vectors

![img](svm.jpg)


### Hyperparameters in SVM

* For improve the performance of models
* Kernal
    * Kernal is a function that transform the non-linear data into liner
    * ex: linear,poly,rbf(Radial Basis Function),sigmoid 
    * Degree
    * C: The C parameter used for correct classification

In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
d=load_wine()
d

{'data': array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
         1.065e+03],
        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
         1.050e+03],
        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
         1.185e+03],
        ...,
        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
         8.350e+02],
        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
         8.400e+02],
        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
         5.600e+02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [3]:
df=pd.DataFrame(d.data,columns=d.feature_names)
df['Target']=d.target
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,Target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [4]:
# find the null values 
df.isnull().sum()

alcohol                         0
malic_acid                      0
ash                             0
alcalinity_of_ash               0
magnesium                       0
total_phenols                   0
flavanoids                      0
nonflavanoid_phenols            0
proanthocyanins                 0
color_intensity                 0
hue                             0
od280/od315_of_diluted_wines    0
proline                         0
Target                          0
dtype: int64

In [5]:
# find the class labels
df['Target'].unique()

array([0, 1, 2])

In [6]:
df['Target'].value_counts()

1    71
0    59
2    48
Name: Target, dtype: int64

In [7]:
# select input and output
x=df.drop('Target',axis=1)
y=df['Target']

In [8]:
# split the data into training and testing
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.3,random_state=42)

In [9]:
# select the model
from sklearn.svm import SVC
model=SVC()
model.fit(xtrain,ytrain)

SVC()

In [11]:
# find the score
print("training score:",model.score(xtrain,ytrain))
print("test score:",model.score(xtest,ytest))

training score: 0.6693548387096774
test score: 0.7592592592592593


In [12]:
from sklearn.metrics import accuracy_score
ypred=model.predict(xtest)
print("score is:",accuracy_score(ytest,ypred))

score is: 0.7592592592592593


In [13]:
# confusion matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(ytest,ypred)

array([[19,  0,  0],
       [ 0, 19,  2],
       [ 0, 11,  3]], dtype=int64)

### Use Hyperparameters in model

In [15]:
# select the model
from sklearn.svm import SVC
model=SVC(kernel='linear')
model.fit(xtrain,ytrain)

SVC(kernel='linear')

In [16]:
ypred=model.predict(xtest)
print("training score:",model.score(xtrain,ytrain))
print("accuracy: ",accuracy_score(ytest,ypred))

training score: 0.9919354838709677
accuracy:  0.9814814814814815


In [17]:
k=['poly','linear','rbf','sigmoid']
for i in k:
    model=SVC(kernel=i)
    model.fit(xtrain,ytrain)
    ypred=model.predict(xtest)
    print("Kernel:",i)
    print("score is:",accuracy_score(ytest,ypred))

Kernel: poly
score is: 0.7592592592592593
Kernel: linear
score is: 0.9814814814814815
Kernel: rbf
score is: 0.7592592592592593
Kernel: sigmoid
score is: 0.2222222222222222


### GridSearchCV

* GridSearchCV is hypermeter tuning technique for find the best parameters in model

In [18]:
from sklearn.svm import SVC
model=SVC()

In [19]:
from sklearn.model_selection import GridSearchCV
params={'C':[1,0.1,100,1000],'kernel':['rbf','poly','sigmoid','linear'],'degree':[1,2,3,4,5]}
model_grid=GridSearchCV(model,params)
model_grid.fit(xtrain,ytrain)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [1, 0.1, 100, 1000], 'degree': [1, 2, 3, 4, 5],
                         'kernel': ['rbf', 'poly', 'sigmoid', 'linear']})

In [20]:
model_grid.best_params_

{'C': 0.1, 'degree': 1, 'kernel': 'linear'}

In [21]:
model_grid.score(xtest,ytest)

1.0

In [22]:
model=SVC(C=0.1,kernel='linear',degree=1)
model.fit(xtrain,ytrain)

SVC(C=0.1, degree=1, kernel='linear')

In [24]:
model.score(xtrain,ytrain)
model.score(xtest,ytest)

1.0

### apply the svc to iris dataset

In [25]:
from sklearn.datasets import load_iris
d=load_iris()
df=pd.DataFrame(d.data,columns=d.feature_names)
df['Target']=d.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),Target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [26]:
x=df.drop('Target',axis=1)
y=df['Target']

In [27]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=42)

In [28]:
from sklearn.svm import SVC
model=SVC()
model.fit(xtrain,ytrain)

SVC()

In [29]:
model.score(xtrain,ytrain)

0.975

In [30]:
model.score(xtest,ytest)

1.0

In [31]:
ypred=model.predict(xtest)
dd=pd.DataFrame({'actual':ytest,"pred":ypred})
dd.sample(5)

Unnamed: 0,actual,pred
45,0,0
78,1,1
145,2,2
131,2,2
108,2,2


### SVR(Support vector Regression)

In [3]:
from sklearn.datasets import load_boston
d=load_boston()
d

{'data': array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
         4.9800e+00],
        [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
         9.1400e+00],
        [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
         4.0300e+00],
        ...,
        [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         5.6400e+00],
        [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
         6.4800e+00],
        [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         7.8800e+00]]),
 'target': array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
        18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
        15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
        13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
        21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
        35.4, 24.7, 3

In [4]:
df=pd.DataFrame(d.data,columns=d.feature_names)
df['Target']=d.target
df.sample(4)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Target
485,3.67367,0.0,18.1,0.0,0.583,6.312,51.9,3.9917,24.0,666.0,20.2,388.62,10.58,21.2
23,0.98843,0.0,8.14,0.0,0.538,5.813,100.0,4.0952,4.0,307.0,21.0,394.54,19.88,14.5
220,0.35809,0.0,6.2,1.0,0.507,6.951,88.5,2.8617,8.0,307.0,17.4,391.7,9.71,26.7
187,0.07875,45.0,3.44,0.0,0.437,6.782,41.1,3.7886,5.0,398.0,15.2,393.87,6.68,32.0


In [5]:
x=df.drop('Target',axis=1)
y=df['Target']

In [6]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.3,random_state=42)

In [7]:
from sklearn.svm import SVR
model=SVR()
model.fit(xtrain,ytrain)

SVR()

In [8]:
model.score(xtrain,ytrain)

0.17666454887600336

In [9]:
model.score(xtest,ytest)

0.281955630218394

In [10]:
from sklearn.svm import SVR
model=SVR(kernel='linear')
model.fit(xtrain,ytrain)

SVR(kernel='linear')

In [11]:
model.score(xtrain,ytrain)

0.7063317074864512

In [14]:
from sklearn.metrics import r2_score
k=['poly','linear','rbf','sigmoid']
for i in k:
    model=SVR(kernel=i)
    model.fit(xtrain,ytrain)
    ypred=model.predict(xtest)
    print("Kernel:",i)
    print("score is:",r2_score(ytest,ypred))

Kernel: poly
score is: 0.26424385846853826
Kernel: linear
score is: 0.6560819310743812
Kernel: rbf
score is: 0.281955630218394
Kernel: sigmoid
score is: 0.1249817525457898


In [44]:
from sklearn.svm import SVR
model=SVR()

In [None]:
from sklearn.model_selection import GridSearchCV
params={'C':[1,0.1,100,1000],'kernel':['rbf','poly','sigmoid','linear'],'degree':[1,2,3,4,5]}
model_grid=GridSearchCV(model,params)
model_grid.fit(xtrain,ytrain)