In [4]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC

In [9]:
digits = datasets.load_digits()

In [20]:
n_samples = len(digits.images)
print(n_samples)

1797


In [15]:
X = digits.images.reshape((n_samples,-1)) 

In [21]:
X

array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [17]:
digits.data

array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [29]:
y = digits.target

In [27]:
import numpy as np
b = np.array([[[1,2,3, 4, 5, 6],
    [7,8, 9, 10, 11, 12]],
    [[1, 2, 3, 4, 5, 6],[7, 8, 9, 10, 11, 12]]])

In [28]:
print(b.reshape(len(b),-1))

[[ 1  2  3  4  5  6  7  8  9 10 11 12]
 [ 1  2  3  4  5  6  7  8  9 10 11 12]]


In [30]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.5, random_state = 0)

In [31]:
turned_para = [{'kernel':['rbf'],'gamma':[1e-3,1e-4],'C':[1,10,100,1000]},
              {'kernel':['linear'],'C':[1,10,100,1000]}]

In [32]:
scores = ['precision','recall']

In [33]:
score = 'precision'
print("Turning hyper-params for {}".format(score))
clf = GridSearchCV(SVC(),turned_para,cv=5,scoring='%s_macro' % score)
clf.fit(X_train,y_train)
print("Best param:")
print(clf.best_params_)

Turning hyper-params for precision
Best param:
{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}


In [34]:
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']

In [35]:
clf.cv_results_



{'mean_fit_time': array([0.03231177, 0.02672834, 0.0315155 , 0.01595573, 0.03171167,
        0.01575294, 0.03190122, 0.01516895, 0.01137967, 0.01077223,
        0.010776  , 0.01096768]),
 'std_fit_time': array([1.61798230e-03, 3.98731289e-04, 7.98118159e-04, 2.10459738e-05,
        3.98935190e-04, 4.09795047e-04, 6.39914624e-04, 4.08307236e-04,
        4.79798785e-04, 3.99833374e-04, 7.64164022e-04, 1.41399706e-05]),
 'mean_score_time': array([0.00678172, 0.00777931, 0.00658937, 0.00499344, 0.00640998,
        0.00439487, 0.00658569, 0.00498505, 0.00299425, 0.00319271,
        0.0033987 , 0.00359054]),
 'std_score_time': array([3.99184266e-04, 3.99088870e-04, 4.94579914e-04, 1.33599527e-05,
        4.67083905e-04, 4.97653690e-04, 4.93307401e-04, 4.25000762e-06,
        2.65234642e-06, 3.99226807e-04, 4.83875388e-04, 4.87896402e-04]),
 'param_C': masked_array(data=[1, 1, 10, 10, 100, 100, 1000, 1000, 1, 10, 100, 1000],
              mask=[False, False, False, False, False, False, False,

## 具体是如何结合所有参数的可能得到最优化的参数方式

+ 对每个{}里面排列组合，所有参数的排列为2*4 = 8 +  4 = 12
+ cv k-折交叉在里面的作用

In [38]:
# [{'kernel':['rbf'],'gamma':[1e-3,1e-4],'C':[1,10,100,1000]},
#               {'kernel':['linear'],'C':[1,10,100,1000]}]
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
          % (mean, std * 2, params))

0.986 (+/-0.016) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.959 (+/-0.029) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.988 (+/-0.017) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.982 (+/-0.026) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.988 (+/-0.017) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.982 (+/-0.025) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.988 (+/-0.017) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.982 (+/-0.025) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.975 (+/-0.014) for {'C': 1, 'kernel': 'linear'}
0.975 (+/-0.014) for {'C': 10, 'kernel': 'linear'}
0.975 (+/-0.014) for {'C': 100, 'kernel': 'linear'}
0.975 (+/-0.014) for {'C': 1000, 'kernel': 'linear'}


In [39]:
y_true, y_pred = y_test, clf.predict(X_test)

In [42]:
print(classification_report(y_true, y_pred))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00        89
          1       0.97      1.00      0.98        90
          2       0.99      0.98      0.98        92
          3       1.00      0.99      0.99        93
          4       1.00      1.00      1.00        76
          5       0.99      0.98      0.99       108
          6       0.99      1.00      0.99        89
          7       0.99      1.00      0.99        78
          8       1.00      0.98      0.99        92
          9       0.99      0.99      0.99        92

avg / total       0.99      0.99      0.99       899



In [43]:
clf.cv_results_.keys()

dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_C', 'param_gamma', 'param_kernel', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split3_test_score', 'split4_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score', 'split0_train_score', 'split1_train_score', 'split2_train_score', 'split3_train_score', 'split4_train_score', 'mean_train_score', 'std_train_score'])