# Combining Pipelines

In [2]:
# import packages
from sklearn.svm import SVC
from sklearn.datasets import load_iris

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion

from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

In [3]:
# load the iris data
iris = load_iris()
x, y = iris['data'], iris['target']

### Feature Extraction

In [5]:
x.shape

(150, 4)

In [6]:
# the dataset is way too high-dimensional
pca = PCA(n_components=2)

# maybe some of the original features were good
selection = SelectKBest(k=3)

In [7]:
# build a transformer from PCA and Univariate selection
combined_features = FeatureUnion([('pca', pca), ('univ_select', selection)])

In [9]:
# initialize the classifier
svm = SVC(kernel='linear')

In [10]:
# create our pipeline from FeatureUnion
pipeline = Pipeline([('features', combined_features), ('svm', svm)])

# set up parameter grid
param_grid = {'features__pca__n_components' : [1, 2, 3],
              'features__univ_select__k' : [1, 2, 3],
              'svm__C' : [.1, 1, 10]}

# create grid search object, passing pipeline as the feature selection
grid_search = GridSearchCV(pipeline, param_grid, verbose=10, refit=True)

# fit the model and tune paramters
grid_search.fit(x,y)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
[CV 1/5; 1/27] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 1/5; 1/27] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time=   0.0s
[CV 2/5; 1/27] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 2/5; 1/27] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time=   0.0s
[CV 3/5; 1/27] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 3/5; 1/27] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.867 total time=   0.0s
[CV 4/5; 1/27] START features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1
[CV 4/5; 1/27] END features__pca__n_components=1, features__univ_select__k=1, svm__C=0.1;, score=0.933 total time=   0.0s
[CV 5/5; 1/27] START features__pca__n_components=1, features__univ_select__k=1, svm__C

GridSearchCV(estimator=Pipeline(steps=[('features',
                                        FeatureUnion(transformer_list=[('pca',
                                                                        PCA(n_components=2)),
                                                                       ('univ_select',
                                                                        SelectKBest(k=3))])),
                                       ('svm', SVC(kernel='linear'))]),
             param_grid={'features__pca__n_components': [1, 2, 3],
                         'features__univ_select__k': [1, 2, 3],
                         'svm__C': [0.1, 1, 10]},
             verbose=10)

In [11]:
print(grid_search.best_params_)

{'features__pca__n_components': 2, 'features__univ_select__k': 3, 'svm__C': 1}
