# Using Pipelines for multiple preprocessing steps

In [1]:
from sklearn import datasets
import numpy as np
mat = datasets.make_spd_matrix(10)
masking_array = np.random.binomial(1, .1, mat.shape).astype(bool)
mat[masking_array] = np.nan
mat[:4, :4]

array([[ 0.93704353, -0.07562567,  0.72610418, -0.29385126],
       [-0.07562567,  0.62603256,  0.04867641,  0.15509402],
       [ 0.72610418,  0.04867641,  2.20566939, -0.64238569],
       [        nan,  0.15509402, -0.64238569,  0.92383203]])

In [3]:
from sklearn import preprocessing
impute = preprocessing.Imputer()
scaler = preprocessing.StandardScaler()
mat_imputed = impute.fit_transform(mat)
mat_imputed[:4, :4]

array([[ 0.93704353, -0.07562567,  0.72610418, -0.29385126],
       [-0.07562567,  0.62603256,  0.04867641,  0.15509402],
       [ 0.72610418,  0.04867641,  2.20566939, -0.64238569],
       [ 0.27518646,  0.15509402, -0.64238569,  0.92383203]])

In [4]:
mat_imp_and_scaled = scaler.fit_transform(mat_imputed)

In [6]:
from sklearn import pipeline
pipe = pipeline.Pipeline([('impute', impute), ('scaler', scaler)])

In [7]:
pipe

Pipeline(steps=[('impute', Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)), ('scaler', StandardScaler(copy=True, with_mean=True, with_std=True))])

In [8]:
new_mat = pipe.fit_transform(mat)
new_mat[:4, :4]

array([[  1.29434021e+00,  -6.01222927e-01,   2.06459074e-01,
         -3.81065953e-01],
       [ -6.86054860e-01,   2.75333465e+00,  -3.13044977e-01,
          3.62232378e-01],
       [  8.81823227e-01,  -6.94711820e-03,   1.34110421e+00,
         -9.58118591e-01],
       [  1.08558658e-16,   5.01824790e-01,  -8.43004890e-01,
          1.63499702e+00]])

In [9]:
np.array_equal(new_mat, mat_imp_and_scaled)

True

In [10]:
pipe.inverse_transform(new_mat)

AttributeError: 'Imputer' object has no attribute 'inverse_transform'

In [11]:
scaler.inverse_transform(new_mat)[:4, :4]

array([[ 0.93704353, -0.07562567,  0.72610418, -0.29385126],
       [-0.07562567,  0.62603256,  0.04867641,  0.15509402],
       [ 0.72610418,  0.04867641,  2.20566939, -0.64238569],
       [ 0.27518646,  0.15509402, -0.64238569,  0.92383203]])