Importing necessary packages.

In [43]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_breast_cancer, load_iris, load_boston
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

In [39]:
data = load_boston()
X = data.data
y = data.target
X.shape, y.shape

((506, 13), (506,))

Split the data into train and test chunks

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=0)

X_train.shape, X_test.shape

((354, 13), (152, 13))

train (fit) and SVC model (**Support Vector Classification**)

In [44]:
clf = SVR(C = 1)
clf.fit(X_train, y_train)

SVR(C=1)

Predict the test data and calculate the binary classification metrics

In [48]:
y_pred = clf.predict(X_test)

In [51]:
clf.score(X_train, y_train), clf.score(X_test, y_pred)

(0.21624037461916024, 1.0)

In [53]:
print('total number of support vectors:', clf.n_support_.sum())

total number of support vectors: 340


In [54]:
print('Alphas:', clf.dual_coef_.round(3))

Alphas: [[-1.    -1.    -1.    -1.    -1.    -1.    -1.    -1.    -1.     1.
   1.     1.    -1.    -1.     1.     1.    -1.     1.    -1.     1.
   1.     1.    -1.    -1.    -1.     1.    -1.    -1.    -1.    -1.
   1.    -1.     1.     1.     1.     1.    -1.     1.     1.    -1.
   1.     1.    -1.     1.    -1.     1.    -1.    -1.     1.     1.
  -1.     1.     1.     1.     1.     1.     1.     1.    -1.    -1.
  -1.    -1.    -1.     1.    -1.     1.     1.    -1.    -1.    -1.
  -1.     1.    -1.    -1.     0.722 -1.    -1.    -1.     1.     1.
   1.     1.     1.    -1.     1.    -1.     1.     1.    -1.     1.
   1.     1.    -1.     1.    -1.     1.    -1.    -1.     1.    -1.
  -1.     1.    -1.    -1.     1.     1.     1.    -1.     1.    -1.
  -1.    -1.     1.    -1.     1.    -1.     1.     1.    -1.    -1.
  -1.    -1.    -1.    -1.    -1.    -1.     1.    -1.     1.     1.
   1.     1.     1.    -1.    -1.     0.216  1.    -1.     1.     1.
   1.    -1.     1.     1.

In [55]:
clf.intercept_

array([19.60460096])

The same process after normalizing the data

In [56]:
clf = make_pipeline(MinMaxScaler(), SVR(C = 1))
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

clf.score(X_train, y_train), clf.score(X_test, y_pred)

(0.6593958167363239, 1.0)

In [57]:
print('total number of support vectors:', clf.steps[1][1].n_support_.sum())

total number of support vectors: 343


In [58]:
print('Alphas:', clf.steps[1][1].dual_coef_.round(3))

Alphas: [[ 1.    -1.     1.    -1.     1.    -1.    -1.    -1.    -1.    -1.
   1.     1.     1.     1.    -1.    -1.     1.    -1.    -1.    -1.
   1.     1.     1.    -1.    -1.     1.     1.    -1.    -1.    -1.
  -1.     1.     1.     1.    -1.     1.     1.    -1.     1.     1.
  -1.     1.    -1.    -1.     1.    -1.     1.    -1.     1.     1.
   1.    -1.     1.    -0.433  1.     1.    -1.    -1.    -1.    -1.
  -1.    -1.     1.    -1.     1.     1.    -0.731 -1.    -1.     1.
  -1.    -1.    -1.     1.    -1.    -1.    -1.     1.    -1.     1.
   1.     1.     1.    -0.329 -1.    -1.     1.     1.     1.     0.808
   1.    -1.     1.    -1.     1.    -1.    -1.     1.     1.    -1.
   1.    -1.    -1.     1.    -1.    -1.    -1.    -1.    -1.    -1.
   1.     1.    -1.    -1.     1.    -1.     1.     1.    -1.    -1.
  -1.    -1.     1.    -1.    -1.    -1.     1.    -1.     1.     1.
   1.     1.    -0.51  -1.    -1.    -1.     1.    -1.    -1.     1.
   1.    -1.     1.    

Trying different values of C

In [61]:
clf = make_pipeline(MinMaxScaler(), SVR(C = 10))
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
clf.score(X_train, y_train), clf.score(X_test, y_test)

(0.8591046590281795, 0.7234870488484579)

x, y
std = stanrdsclr .fit(x)
x_tra = stf.transform(x)

svc().fit(x_tra, y)

In [62]:
clf = make_pipeline(StandardScaler(), SVR(C = 10))
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
clf.score(X_train, y_train), clf.score(X_test, y_test)

(0.9089609024052957, 0.7423210756410796)

In [64]:
print('total number of support vectors:', clf.steps[1][1].n_support_.sum())

total number of support vectors: 330


In [65]:
print('Alphas:', clf.steps[1][1].dual_coef_.round(3))

Alphas: [[ 1.000e+01 -1.000e+01  1.000e+01 -1.000e+01  1.000e+01 -1.000e+01
  -1.000e+01 -1.000e+01 -1.000e+01  1.000e+01 -1.000e+01  1.000e+01
   1.000e+01 -1.000e+01  3.876e+00  1.000e+01 -1.000e+01  4.692e+00
  -1.000e+01 -1.000e+01 -1.000e+01 -1.000e+01  1.000e+01  1.000e+01
  -1.325e+00 -1.000e+01 -5.531e+00 -1.000e+01  8.109e+00  1.000e+01
  -9.515e+00 -2.981e+00 -1.000e+01  1.000e+01  1.000e+01  1.000e+01
  -1.000e+01 -1.000e+01 -1.000e+01  1.000e+01 -1.000e+01 -5.578e+00
  -1.000e+01  1.000e+01 -1.000e+01  8.494e+00  1.000e+01  3.487e+00
  -1.668e+00 -1.000e+01  1.782e+00 -1.000e+01 -1.000e+01  1.000e+01
  -1.000e+01 -1.000e+01 -1.000e+01 -7.050e-01 -1.000e+01 -1.000e+01
  -9.176e+00  1.000e+01 -1.000e+01  1.000e+01  1.000e+01  5.043e+00
   1.000e+01  1.882e+00  1.000e+01 -4.510e+00 -1.000e+01 -1.000e+01
  -1.000e+01 -1.000e+01 -1.000e+01  1.000e+01  1.000e+01 -1.000e+01
   1.000e+01  1.000e+01  1.000e+01 -1.000e+01 -1.000e+01 -1.000e+01
   1.000e+01  1.000e+01  4.000e-03  1.00

In [67]:
pipe = make_pipeline(StandardScaler(), SVR())

parameters = {'svr__kernel':['linear', 'rbf', 'poly'], 'svr__C':[1, 10], 'svr__gamma': [ 0.1, 1.0, 10]}

clf = GridSearchCV(pipe, parameters, n_jobs=-1, cv=5)
clf.fit(X_train, y_train)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('svr', SVR())]),
             n_jobs=-1,
             param_grid={'svr__C': [1, 10], 'svr__gamma': [0.1, 1.0, 10],
                         'svr__kernel': ['linear', 'rbf', 'poly']})

In [68]:
clf.best_estimator_.steps[1][1]

SVR(C=10, gamma=0.1)

In [69]:
results = clf.cv_results_
df = pd.DataFrame(results)

df.mean_test_score = df.mean_test_score.round(5) * 100
df.mean_fit_time = df.mean_fit_time.round(3)
df.params.str.replace('svr__', '')

df = df[[
    'rank_test_score', 'params', 'mean_test_score', 'mean_fit_time'
]]
df.sort_values('rank_test_score',inplace=True)
df

Unnamed: 0,rank_test_score,params,mean_test_score,mean_fit_time
10,1,"{'svr__C': 10, 'svr__gamma': 0.1, 'svr__kernel...",81.489,0.011
9,2,"{'svr__C': 10, 'svr__gamma': 0.1, 'svr__kernel...",72.319,0.03
12,2,"{'svr__C': 10, 'svr__gamma': 1.0, 'svr__kernel...",72.319,0.028
15,2,"{'svr__C': 10, 'svr__gamma': 10, 'svr__kernel'...",72.319,0.028
0,5,"{'svr__C': 1, 'svr__gamma': 0.1, 'svr__kernel'...",72.279,0.009
3,5,"{'svr__C': 1, 'svr__gamma': 1.0, 'svr__kernel'...",72.279,0.009
6,5,"{'svr__C': 1, 'svr__gamma': 10, 'svr__kernel':...",72.279,0.009
1,8,"{'svr__C': 1, 'svr__gamma': 0.1, 'svr__kernel'...",62.795,0.007
13,9,"{'svr__C': 10, 'svr__gamma': 1.0, 'svr__kernel...",52.947,0.01
2,10,"{'svr__C': 1, 'svr__gamma': 0.1, 'svr__kernel'...",20.186,0.008


In [70]:
print('total number of support vectors:', clf.best_estimator_.steps[1][1].n_support_.sum())

total number of support vectors: 341


In [71]:
print('Alphas:', clf.best_estimator_.steps[1][1].dual_coef_.round(2))

Alphas: [[  8.69 -10.    10.   -10.    10.   -10.   -10.   -10.   -10.    10.
   -1.46 -10.    10.    10.   -10.     6.91  10.   -10.     1.4  -10.
    0.94 -10.   -10.   -10.    10.    10.    -1.06 -10.    -6.7  -10.
    8.95  10.    -8.86  -8.03 -10.    10.    10.    10.   -10.   -10.
  -10.     0.52  10.   -10.    -5.74 -10.    10.   -10.     4.6   10.
    8.71  -3.19 -10.     3.19 -10.   -10.    10.   -10.   -10.    -8.44
   -4.76 -10.   -10.    -6.4   10.   -10.    10.    10.     5.78  10.
    2.84  10.    -4.01 -10.   -10.   -10.    -9.44 -10.     3.78  -1.65
   10.    10.   -10.    10.    10.    10.    -9.65 -10.   -10.    10.
   10.    -0.08  10.   -10.   -10.    -7.05   8.37   2.46 -10.    -1.79
   10.   -10.     0.19 -10.    -9.28  10.    -3.74   7.64  -3.22 -10.
   10.    10.     7.38 -10.    10.    10.    10.    10.     4.55 -10.
  -10.    10.   -10.     8.23  10.   -10.    -4.01   6.24  10.    10.
  -10.     2.2   10.    10.   -10.    10.    10.    -1.02  10.   -10.
  -10.