In [1]:
from sklearn import svm
from sklearn.datasets import samples_generator
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.pipeline import Pipeline

# 生成数据
X, y = samples_generator.make_classification(n_informative=5, n_redundant=0, random_state=42)

In [2]:
X

array([[ 0.61118028,  0.07396296, -0.49596905, ..., -0.51753365,
        -0.37339927, -0.70521074],
       [-0.55470506, -1.26634051, -1.03437283, ..., -0.05798395,
         0.07377011,  0.60247721],
       [ 0.72456704, -0.22624522,  1.28626861, ...,  1.06456868,
        -0.45374431,  0.44663973],
       ..., 
       [ 1.25561121,  0.40561759,  1.5316888 , ...,  0.71500701,
         0.48056211,  0.40041203],
       [ 1.72707396, -0.00827807,  1.20562808, ...,  0.69476103,
         1.3238748 ,  0.93299664],
       [-0.47240735, -0.03014427,  1.7691167 , ..., -0.56770578,
         0.28012139,  0.3905229 ]])

In [3]:
X.shape

(100, 20)

In [4]:
# ANOVA SVM-C
anova_filter = SelectKBest(f_regression, k=5)
svm_clf = svm.SVC(kernel='linear')

In [5]:
svm_clf

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [6]:
# Pipeline of transforms with a final estimator.
# Sequentially apply a list of transforms and a final estimator.
anova_svm = Pipeline([
    ('anova', anova_filter),
    ('svc', svm_clf)
])

In [7]:
anova_svm # steps 包含2个操作

Pipeline(memory=None,
     steps=[('anova', SelectKBest(k=5, score_func=<function f_regression at 0x113168d08>)), ('svc', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

In [8]:
anova_svm.set_params(anova__k=10, svc__C=0.1).fit(X, y)

Pipeline(memory=None,
     steps=[('anova', SelectKBest(k=10, score_func=<function f_regression at 0x113168d08>)), ('svc', SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

In [9]:
prediction = anova_svm.predict(X)

In [10]:
prediction

array([1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0,
       1, 1, 1, 0, 1, 0, 0, 0])

In [11]:
y

array([0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 0, 0, 1])

In [12]:
anova_svm.score(X, y)

0.82999999999999996