# Example - 1

- Displaying pipeline with Standard scaler and Estimator (Logistic regression)

In [None]:
from sklearn.pipeline import Pipeline

# feature Scaling
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression


In [None]:
steps = [("standard_scaler",StandardScaler()),
         ("classifier",LogisticRegression())]

In [None]:
steps

[('standard_scaler', StandardScaler()), ('classifier', LogisticRegression())]

In [None]:
pipe = Pipeline(steps)
pipe

In [None]:
# visualize the Pipeline

from sklearn import set_config

set_config(display = 'diagram')

pipe

In [None]:
# creating a dataset

from sklearn.datasets import make_classification

X,y = make_classification(n_samples=1000)


In [None]:
X.shape

(1000, 20)

In [None]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.33, random_state = 42)


In [None]:
X_train

array([[ 0.74404199, -0.45534111, -0.52950786, ..., -0.7041975 ,
         0.51279448, -1.3501395 ],
       [-1.23203963, -0.80650656, -0.67859432, ..., -0.72286506,
         0.58954155, -0.14315368],
       [-0.38891204, -0.06714577, -0.18701614, ..., -0.97022412,
        -0.47724294,  0.64032681],
       ...,
       [-0.19795369, -0.1913783 , -0.41295608, ..., -0.1438062 ,
         0.35680824, -1.97905422],
       [-1.51852285,  1.29880329,  2.27236504, ...,  0.90766441,
        -0.57001812, -1.35486193],
       [ 0.31498394, -0.78283432, -1.08560723, ..., -2.0218172 ,
        -0.25255762,  0.91680032]])

In [None]:
pipe.fit(X_train,y_train)

In [None]:
y_pred = pipe.predict(X_test)
y_pred

array([1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,

# Example - 2

- Displaying pipeline with Standard scaler and dimensionality reduction and then Estimator (Logistic regression)

In [None]:
from sklearn.decomposition import PCA
from sklearn.svm import SVC


In [None]:
steps =[("scaling",StandardScaler()),
        ("PCA",PCA(n_components=3)),
        ("SVC",SVC())]
steps

[('scaling', StandardScaler()), ('PCA', PCA(n_components=3)), ('SVC', SVC())]

In [None]:
pipe2 = Pipeline(steps)
pipe2

In [None]:
# pipe2['scaling'].fit_transform(X_train)  # if want to apply only scaling then we can do .

In [None]:
pipe2.fit(X_train,y_train)

In [None]:
pipe2.predict(X_test)

array([1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0,

#Complex examples of columns transformer  

In [None]:
from sklearn.impute import SimpleImputer

# for numerical processing pipeline
from sklearn.pipeline import Pipeline
import numpy as np

# feature Scaling
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

numeric_processor = Pipeline(
    steps = [("imputation_mean",SimpleImputer(strategy='mean')),
             ("scaler",StandardScaler())]
)
numeric_processor

In [None]:
# categorical processing pipeline

from sklearn.preprocessing import OneHotEncoder

categorical_processor = Pipeline(
    steps = [("imputation_constant",SimpleImputer(fill_value = -1,strategy = "constant")),
             ("onehot",OneHotEncoder(handle_unknown="ignore"))]
)
categorical_processor

In [None]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

#  Load the Iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Convert X into a pandas DataFrame
X_df = pd.DataFrame(X, columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_df, y, test_size=0.2, random_state=42)


In [None]:
# combining processing technique

from sklearn.compose import ColumnTransformer

preprocessor = ColumnTransformer(
    [("categorical",categorical_processor,['sepal_length', 'sepal_width']),
     ("numerical",numeric_processor, ['petal_length', 'petal_width'])]
)

In [None]:
preprocessor

In [None]:
from sklearn.pipeline import make_pipeline

In [None]:
make_pipeline(preprocessor,LogisticRegression())

In [None]:
# Define the logistic regression model
logistic_regression_model = LogisticRegression()

# Combine preprocessing steps with the model
full_pipeline = make_pipeline(preprocessor, logistic_regression_model)


In [None]:
# Fit the pipeline to your data
full_pipeline.fit(X_train, y_train)

# Make predictions
predictions = full_pipeline.predict(X_test)  # Assuming you have a separate test dataset X_test

In [None]:
predictions

array([1, 0, 2, 1, 1, 0, 1, 2, 2, 1, 2, 0, 0, 0, 0, 2, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

In [None]:
from sklearn.metrics import accuracy_score

# Calculate accuracy score
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)


Accuracy: 0.9333333333333333
