## Imports

In [49]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

## Load in data

In [43]:
data = load_boston()
X_train, X_test, y_train, y_test = train_test_split(data['data'], data['target'])

## Create pipeline

In [45]:
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('reduce_dim', PCA()),
    ('regressor', Ridge())
])

## Fit pipeline

In [47]:
pipe = pipe.fit(X_train, y_train)

## View parameters with `get_params()`

In [48]:
pipe.get_params()

{'memory': None,
 'steps': [('scaler',
   StandardScaler(copy=True, with_mean=True, with_std=True)),
  ('reduce_dim',
   PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
       svd_solver='auto', tol=0.0, whiten=False)),
  ('regressor',
   Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
         normalize=False, random_state=None, solver='auto', tol=0.001))],
 'verbose': False,
 'scaler': StandardScaler(copy=True, with_mean=True, with_std=True),
 'reduce_dim': PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
     svd_solver='auto', tol=0.0, whiten=False),
 'regressor': Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
       normalize=False, random_state=None, solver='auto', tol=0.001),
 'scaler__copy': True,
 'scaler__with_mean': True,
 'scaler__with_std': True,
 'reduce_dim__copy': True,
 'reduce_dim__iterated_power': 'auto',
 'reduce_dim__n_components': None,
 'reduce_dim__random_state': None,
 're

In [24]:
print('Testing score: ', pipe.score(X_test, y_test))

Testing score:  0.7391809400706117


## Fine tune model with `GridSearchCV`

In [50]:
n_components = np.arange(1, 11)
alpha = 2.0**np.arange(-6, 6)

params = {
    'reduce_dim__n_components': n_components,
    'regressor__alpha': alpha
}

gridsearch = GridSearchCV(pipe, params, verbose=1).fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 120 candidates, totalling 360 fits


[Parallel(n_jobs=1)]: Done 360 out of 360 | elapsed:    1.3s finished


In [51]:
gridsearch.score(X_test, y_test)

0.7004888795698064

In [52]:
gridsearch.best_params_

{'reduce_dim__n_components': 6, 'regressor__alpha': 2.0}