In [1]:
from sklearn.pipeline import Pipeline
##feature Scaling
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [2]:
steps=[("standard_scaler",StandardScaler()),
      ("classifier",LogisticRegression())]

In [3]:
steps

[('standard_scaler', StandardScaler()), ('classifier', LogisticRegression())]

In [4]:
pipe=Pipeline(steps)

In [5]:
##visualize Pipeline
from sklearn import set_config

In [6]:
set_config(display="diagram")

In [7]:
pipe

In [11]:
##creating a dataset
from sklearn.datasets import make_classification
X,y=make_classification(n_samples=100)

In [12]:
X.shape

(100, 20)

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [14]:
X_train

array([[-1.29444716, -0.61761994,  0.92208198, ...,  0.13739618,
        -0.86099769, -1.11039118],
       [-1.2291638 ,  0.5428851 , -0.38915225, ..., -0.90134508,
        -1.15488865, -0.49127681],
       [ 1.05425766,  0.75505698, -0.98519692, ..., -0.26181575,
        -1.07004443,  0.98616982],
       ...,
       [ 2.19796369, -0.83139424,  0.34439141, ...,  1.66859587,
         0.82464956,  0.84747507],
       [ 1.79543699,  0.13831403,  0.20088308, ...,  0.29234157,
        -1.79829817,  1.27659438],
       [ 1.29829163,  1.13674403, -0.46607552, ...,  1.03967496,
         1.54316879,  0.47107718]])

In [15]:
pipe.fit(X_train,y_train)

In [16]:
y_pred=pipe.predict(X_test)

In [17]:
y_pred

array([0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0])

# Example 2
Displaying a pipeline with standard scaler, dimesnionality reduction and then estimator

In [18]:
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [19]:
steps=[("scaling",StandardScaler()),
      ("PCA",PCA(n_components=3)),
      ("SVC",SVC())]

In [20]:
pipe2=Pipeline(steps)

In [21]:
pipe2.fit(X_train,y_train)

In [22]:
pipe2.predict(X_test)

array([1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1])

# Complex examples

In [23]:
from sklearn.impute import SimpleImputer

In [24]:
## numerical processing pipeline
import numpy as np
numeric_processor=Pipeline(
    steps=[("imputation_mean",SimpleImputer(missing_values=np.nan,strategy="mean")),
          ("scaler",StandardScaler())]

)

In [25]:
numeric_processor

In [26]:
##categorical procesing pipeline

from sklearn.preprocessing import OneHotEncoder
categorical_processor=Pipeline(
    steps=[("imputation_consatnt",SimpleImputer(fill_value="missing",strategy="constant")),
          ("onehot",OneHotEncoder(handle_unknown="ignore"))]

)

In [27]:
categorical_processor

In [28]:
## combine processing technqiues
from sklearn.compose import ColumnTransformer
preprocessor=ColumnTransformer(
    [("categorical",categorical_processor,["gender","City"]),
    ("numerical",numeric_processor,["age","height"])]


)

In [29]:
preprocessor

In [30]:
from sklearn.pipeline import make_pipeline
pipe=make_pipeline(preprocessor,LogisticRegression())
pipe