In [3]:
from sklearn.pipeline import Pipeline #Pipeline library
from sklearn.pipeline import make_pipeline
from sklearn import set_config # Use for visualize pipeline
from sklearn.compose import ColumnTransformer # for combine processing technqiues


#libraries for preprocessing & ML models. 
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

#processing
from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import MinMaxScaler
# from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.decomposition import PCA
from sklearn.svm import SVC

# #Score
# from sklearn.metrics import classification_report
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# from sklearn.metrics import mean_squared_error
# from sklearn.metrics import accuracy_score
# from sklearn.metrics import plot_confusion_matrix


# #models
# from sklearn import tree
# import xgboost as xgb
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.ensemble import GradientBoostingClassifier
# from imblearn.under_sampling import NearMiss
# from imblearn.over_sampling import RandomOverSampler
# from imblearn.over_sampling import SMOTE

# #tuning
# from sklearn.model_selection import GridSearchCV
# #kfold
# from sklearn.model_selection import TimeSeriesSplit

#common libraries
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns
from sklearn.impute import SimpleImputer

# others
from sklearn.datasets import make_classification


# Set Steps

In [5]:
steps=[("standard_scaler",StandardScaler()),
      ("classifier",LogisticRegression())]
steps

[('standard_scaler', StandardScaler()), ('classifier', LogisticRegression())]

In [6]:
# fit steps to pipeline
pipe=Pipeline(steps)
set_config(display="diagram") #config pipe as a diagram
pipe

In [8]:
#make example dataset
X,y=make_classification(n_samples=1000)
X.shape


(1000, 20)

In [9]:
#train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [10]:
# Fit train set to pipe
pipe.fit(X_train,y_train)

#predict test set
y_pred=pipe.predict(X_test)


# Above steps are basic step for piping

# Ex2: Displaying a pipeline with standard scaler, dimesnionality reduction and then estimator

In [11]:
#1. define steps
steps=[("scaling",StandardScaler()),
      ("PCA",PCA(n_components=3)),
      ("SVC",SVC())]

#2. Add steps to pipeline & fit to train_set
pipe2=Pipeline(steps)
pipe2.fit(X_train,y_train)

#3. Predict on test set
pipe2.predict(X_test)


array([0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,
       0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1,
       1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1,
       1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,

# Ex3: Comple pipeline of columns transformer

In [12]:
numeric_processor=Pipeline(
    steps=[("imputation_mean",SimpleImputer(missing_values=np.nan,strategy="mean")),
          ("scaler",StandardScaler())]
)
numeric_processor

In [13]:
##categorical procesing pipeline
categorical_processor=Pipeline(
    steps=[("imputation_consatnt",SimpleImputer(fill_value="missing",strategy="constant")),
          ("onehot",OneHotEncoder(handle_unknown="ignore"))]

)
categorical_processor

In [15]:
preprocessor=ColumnTransformer(
    [("categorical",categorical_processor,["gender","City"]),
    ("numerical",numeric_processor,["age","height"])]
)
preprocessor

In [16]:
pipe=make_pipeline(preprocessor,LogisticRegression())
pipe