In [106]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
%matplotlib inline

In [107]:
def create_data():
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['label'] = iris.target
    data = np.array(df.iloc[:100, [0, 1, -1]])
    for i in range(len(data)):
        if data[i, -1] == 0:
            data[i, -1] = -1
    return data[:, :2], data[:, -1]

In [108]:
"""数据进行归一化操作"""
scaler = StandardScaler()
X, y = create_data()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [109]:
from sklearn.svm import SVC
clf = SVC()
clf.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [110]:
clf.score(X_test, y_test)

1.0

In [114]:
"""SVM参数调整"""
from sklearn.model_selection import GridSearchCV

In [None]:
"""
高斯核主要参数为：C, gamma
C：对误差的宽容度，越大越容易出现过拟合
gamma：决定数据映射到数据空间的分布，gamma越大，支持向量越少
       物理意义是rbf放入幅宽，gamma过大时高斯分布又高又瘦，方差小，
       只会作用于支持向量附近，模型泛化能力差
"""

In [129]:
svc = SVC(kernel='rbf')
param_grid = {
    'C':[1e-3, 1e-2, 1e-1, 1, 10, 100, 1000],
    'gamma':[0.1, 0.2, 0.4, 0.6, 0.8, 1.6, 3.2, 6.4, 12.8]
}
grid_search = GridSearchCV(svc, param_grid, verbose=1)
grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_estimator_.get_params()
# print("输出cv所有结果："%grid_search.best_params_, grid_search.cv_results_)
print("输出当前最好结果："%grid_search.best_params_, grid_search.best_params_)
print("输出最优得分："%grid_search.best_params_, grid_search.best_score_)

Fitting 3 folds for each of 63 candidates, totalling 189 fits
输出当前最好结果： {'C': 0.1, 'gamma': 0.2}
输出最优得分： 1.0


[Parallel(n_jobs=1)]: Done 189 out of 189 | elapsed:    0.2s finished


In [130]:
svc = SVC(kernel='rbf', C=best_parameters['C'], gamma=best_parameters['gamma'])
svc.fit(X_train, y_train)

SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.2, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [131]:
svc.predict(X_test)
svc.score(X_test, y_test)

1.0