# SVM思想解决回归问题

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets

In [2]:
boston = datasets.load_boston()

x = boston.data
y = boston.target

In [3]:
# 数据分离
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state = 666)

In [4]:
from sklearn.svm import LinearSVR
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [5]:
def StandardLinearSVR(epsilon = 0.1, C=1):
    return Pipeline([
        ("std", StandardScaler()),
        ("linearSVR", LinearSVR(epsilon=epsilon, C=C))
    ])

def StandardSVR(epsilon = 0.1, gamma=1, C=1):
    return Pipeline([
        ("std", StandardScaler()),
        ("svr", SVR(epsilon=epsilon, kernel="rbf", gamma=gamma, C=C))
    ])

In [6]:
liner_svr = StandardLinearSVR(1.2, 2)
liner_svr.fit(x_train, y_train)
print("train score : ", liner_svr.score(x_train, y_train))
print("test score : ",liner_svr.score(x_test, y_test))

train score :  0.7352068535060929
test score :  0.6422641699140331


In [7]:
svr = StandardSVR(1.2, 0.05, 100)
svr.fit(x_train, y_train)
print("train score : ", svr.score(x_train, y_train))
print("test score : ",svr.score(x_test, y_test))

train score :  0.9546147934940431
test score :  0.8447803722382281


## 调整超参数

In [19]:
from sklearn.model_selection import GridSearchCV

grid_prama = [
    {
        "std":[StandardScaler()],
        "svr__kernel":["rbf"],
        "svr__epsilon": np.arange(0,3,0.1),
        "svr__gamma":np.arange(0,1,0.05),
        "svr__C":np.arange(1,101,0.5)
    }
]

In [20]:
svr_for_search = Pipeline([
        ("std", StandardScaler()),
        ("svr", SVR())
    ])

svr_search = GridSearchCV(svr_for_search, grid_prama, verbose=1, cv=5, n_jobs=-1)
svr_search.fit(x_train, y_train)

Fitting 5 folds for each of 120000 candidates, totalling 600000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 6668 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done 22668 tasks      | elapsed:   22.1s
[Parallel(n_jobs=-1)]: Done 45068 tasks      | elapsed:   43.9s
[Parallel(n_jobs=-1)]: Done 73868 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 109068 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 150668 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 198668 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 253068 tasks      | elapsed:  4.9min
[Parallel(n_jobs=-1)]: Done 288804 tasks      | elapsed:  5.8min
[Parallel(n_jobs=-1)]: Done 314004 tasks      | elapsed:  6.4min
[Parallel(n_jobs=-1)]: Done 341604 tasks      | elapsed:  7.1min
[Parallel(n_jobs=-1)]: Done 371604 tasks      | elapsed:  7.8min
[Parallel(n_jobs=-1)]: Done 404004 tasks      | elapsed:  8.6min
[Parallel(n_jobs=-1)]:

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('std',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('svr',
                                        SVR(C=1.0, cache_size=200, coef0=0.0,
                                            degree=3, epsilon=0.1,
                                            gamma='auto_deprecated',
                                            kernel='rbf', max_iter=-1,
                                            shrinking=True, tol=0.001,
                                            verbose=False))],
                                verbose=False),
             iid='warn', n_jobs=-1,
             param_gri...
                          'svr__epsilon': array([0. , 0.1, 0.2, 0.3, 0.4

In [21]:
svr_search.best_params_

{'std': StandardScaler(copy=True, with_mean=True, with_std=True),
 'svr__C': 100.5,
 'svr__epsilon': 0.6000000000000001,
 'svr__gamma': 0.05,
 'svr__kernel': 'rbf'}

In [22]:
svr_search.best_score_

0.8640371766815954

In [26]:
svr = StandardSVR(0.6, 0.05, 100.5)
svr.fit(x_train, y_train)
print("train score : ", svr.score(x_train, y_train))
print("test score : ",svr.score(x_test, y_test))

train score :  0.9560591596706831
test score :  0.8339639628432032
