### 1. Pipelines in scikit-learn

In [1]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline, make_pipeline

pipe_long = Pipeline([("scaler", MinMaxScaler), ("svm", SVC())])

pipe_short = make_pipeline(MinMaxScaler(), SVC())

In [2]:
pipe_short.steps

[('minmaxscaler', MinMaxScaler()), ('svc', SVC())]

In [3]:
from sklearn.preprocessing import Normalizer

pipe = make_pipeline(MinMaxScaler(), Normalizer(), MinMaxScaler())
pipe.steps

[('minmaxscaler-1', MinMaxScaler()),
 ('normalizer', Normalizer()),
 ('minmaxscaler-2', MinMaxScaler())]

In [4]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=42)

pipe = make_pipeline(MinMaxScaler(), SVC())
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)

0.9790209790209791

In [5]:
from sklearn.model_selection import GridSearchCV
param_grid = {'svc__C': [0.01, 0.1, 1, 10, 100],
              'svc__gamma': [0.001, 0.01, 0.1, 1, 10, 100]}

grid = GridSearchCV(pipe, param_grid=param_grid, cv=5)
grid.fit(X_train, y_train)

print("Best cross-validation accuracy:", grid.best_score_)
print("Test set score:", grid.score(X_test, y_test))
print("Best Parameters:", grid.best_params_)

Best cross-validation accuracy: 0.9741450068399453
Test set score: 0.986013986013986
Best Parameters: {'svc__C': 10, 'svc__gamma': 0.1}
