## Pipelines

In [15]:
# Import
from sklearn.svm import SVC
from sklearn.datasets import load_breast_cancer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Load data
cancer = load_breast_cancer()

# Train
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, random_state=0)

# Long syntax
pipe_long = Pipeline([("scaler", MinMaxScaler()), ("svc", SVC(C=100))])

# Shortened syntax
pipe_short = make_pipeline(MinMaxScaler(), SVC(C=100))
print("Pipeline steps:\n{}".format(pipe_short.steps))

# Fit pipeline
pipe_short.fit(X_train, y_train)
print("Test score: {:.2f}".format(pipe_short.score(X_test, y_test)))

Pipeline steps:
[('minmaxscaler', MinMaxScaler()), ('svc', SVC(C=100))]
Test score: 0.97


### Pipelines in Grid Searches

In [16]:
# Import
from sklearn.model_selection import GridSearchCV

# Parameters for grid search
param_grid = {'svc__C': [0.001, 0.01, 0.1, 1, 10, 100],
              'svc__gamma': [0.001, 0.01, 0.1, 1, 10, 100]}

# Pipeline the grid search
print("Pipeline steps:\n{}".format(pipe_short.steps))
grid = GridSearchCV(pipe_short, param_grid=param_grid, cv=5)

# Fit
grid.fit(X_train, y_train)

# Print
print("Best cross-validation accuracy: {:.2f}".format(grid.best_score_))
print("Test set score: {:.2f}".format(grid.score(X_test, y_test)))
print("Best parameters: {}".format(grid.best_params_))

Pipeline steps:
[('minmaxscaler', MinMaxScaler()), ('svc', SVC(C=100))]
Best cross-validation accuracy: 0.98
Test set score: 0.97
Best parameters: {'svc__C': 1, 'svc__gamma': 1}
