# Creating Pipeline To Simplify The Projects 

In [1]:
from sklearn.pipeline import Pipeline

# Feature Scaling

from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression

In [3]:
steps = [("Standard_Scaler",StandardScaler()),
        ("Classification",LogisticRegression())]

In [4]:
steps

[('Standard_Scaler', StandardScaler()),
 ('Classification', LogisticRegression())]

In [6]:
# Convert This INTO Pipeline

pipe = Pipeline(steps)

In [7]:
pipe

# OR

In [9]:
# Visualize The Pipeline

from sklearn import set_config

set_config(display="diagram")

In [10]:
pipe

In [11]:
# Create DataSet

from sklearn.datasets import make_classification
X,y = make_classification(n_samples=1000)

In [12]:
X.shape

(1000, 20)

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [15]:
X_train

array([[-0.37420744, -1.2720638 ,  1.8063763 , ...,  1.48673543,
         0.37875673,  0.7342014 ],
       [-0.21224462, -2.39946495, -0.00980397, ..., -0.40678726,
         1.25783806, -1.4950193 ],
       [-0.07768924, -0.90142691, -0.0209016 , ...,  1.40308456,
         1.20286002, -1.33759887],
       ...,
       [ 0.27908258,  0.36587927, -1.22316796, ..., -0.06513749,
         3.11490169, -0.47416459],
       [ 0.12487536,  0.48808801, -0.59661326, ..., -0.28593549,
         1.37065389, -0.05424161],
       [ 0.7208971 ,  0.84553759,  2.22529717, ...,  0.79734131,
        -0.76890018, -0.65878658]])

In [16]:
pipe.fit(X_train,y_train)

In [17]:
y_pred = pipe.predict(X_test)

In [18]:
y_pred

array([0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0,

# Example 2

### Displaying a Pipeline With Standard Scaler , Dimentionality Reduction and Then Estimator

In [25]:
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [30]:
steps = [("scaling",StandardScaler()),
        ("PCA",PCA(n_components=3)),
        ("SVC",SVC())]

In [31]:
steps

[('scaling', StandardScaler()), ('PCA', PCA(n_components=3)), ('SVC', SVC())]

In [32]:
pipe_2 = Pipeline(steps)

In [33]:
pipe_2

In [34]:
# pipe_2['scaling'].fit_transform(X_train)

array([[-0.3878146 , -1.29447986,  1.67065696, ...,  1.58387428,
         0.26095587,  0.81680445],
       [-0.2188988 , -2.43781127, -0.01576595, ..., -0.39339177,
         0.81612343, -1.53664975],
       [-0.07856702, -0.91860585, -0.02607071, ...,  1.49652386,
         0.78140306, -1.37045635],
       ...,
       [ 0.29352085,  0.36660746, -1.14244092, ..., -0.03663212,
         1.98891764, -0.45890325],
       [ 0.13269358,  0.49054301, -0.56065054, ..., -0.26719518,
         0.88737018, -0.01557815],
       [ 0.75430224,  0.85304338,  2.05964795, ...,  0.86399087,
        -0.46382564, -0.65381415]])

In [35]:
pipe_2.fit(X_train,y_train)

In [36]:
pipe_2_pred = pipe_2.predict(X_test)

In [37]:
pipe_2_pred

array([0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0,
       0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,

# Example 3

## Complex Example Of Columns Transformer

In [39]:
from sklearn.impute import SimpleImputer
import numpy as np

In [40]:
# Numerical Processing PIpeline

numeric_processor = Pipeline(
    steps=[("Imputation_mean",SimpleImputer(missing_values=np.nan,strategy='mean')),
          ("scaler",StandardScaler())])


In [41]:
numeric_processor

In [42]:
from sklearn.preprocessing import OneHotEncoder

In [43]:
# Catrgorical Processing PIpeline

categorical_processor = Pipeline(
    steps=[("Imputation_constant",SimpleImputer(fill_value="missing",strategy="constant")),
          ("onehot",OneHotEncoder(handle_unknown="ignore"))])

In [44]:
categorical_processor

In [45]:
# To Combine 2 Pipelines

from sklearn.compose import ColumnTransformer

In [46]:
ColumnTransformer([("Categorical",categorical_processor,["gender","City"]),
                  ("Numerical",numeric_processor,["age","height"])])

In [47]:
full_processor = ColumnTransformer([("Categorical",categorical_processor,["gender","City"]),
                  ("Numerical",numeric_processor,["age","height"])])

In [48]:
full_processor

In [49]:
# Here we have done custom pipe line so we one more library

from sklearn.pipeline import make_pipeline

In [50]:
final_pipe=make_pipeline(full_processor,LogisticRegression())

In [51]:
final_pipe