# Feature Scaling

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [3]:
steps=[('standard_scaler',StandardScaler()),
      ('classifier', LogisticRegression())]

In [4]:
steps

[('standard_scaler', StandardScaler()), ('classifier', LogisticRegression())]

In [5]:
pipe=Pipeline(steps)

# Visualizing pipeline

In [6]:
from sklearn import set_config

In [7]:
set_config(display='diagram')

In [8]:
pipe

# Creating a dataset

In [9]:
from sklearn.datasets import make_classification
X,y=make_classification(n_samples=1000)

In [10]:
X.shape

(1000, 20)

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [12]:
X_train

array([[ 1.53001623, -1.66950128, -0.30500881, ..., -0.40926655,
         0.72164052, -1.06405659],
       [-1.55514362, -1.8108755 ,  1.23996938, ..., -0.89384437,
        -0.39918107, -1.8761463 ],
       [-2.01463185,  1.52837629, -0.46928402, ...,  1.31920651,
         0.05646009,  0.43659265],
       ...,
       [-0.48754389,  2.22359952,  1.19069205, ...,  0.57202001,
         0.9034076 ,  1.21506762],
       [ 1.43226269, -0.5486555 ,  1.18143592, ...,  1.11795835,
         0.95633161, -1.3669629 ],
       [-0.86079298, -1.2678331 ,  3.00686864, ...,  1.49624168,
        -0.93322144,  0.11841759]])

Example 1

In [13]:
pipe.fit(X_train,y_train)                                      # 

In [14]:
y_pred=pipe.predict(X_test)

In [15]:
y_pred

array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
       0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,

# Example 2
Display a pipeline with standardScaler, dimensionality reduction, and then estimator

In [16]:
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [17]:
Steps=[('scaling',StandardScaler()),
       ('PCA', PCA(n_components=3)),
      ('SVC',SVC())]

In [18]:
pipe2=Pipeline(Steps)

In [19]:
pipe2.fit(X_train,y_train)

In [20]:
pipe2.predict(X_test)

array([1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1,
       0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
       0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1,
       1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,

# Example 3 Complex Eg of columns transformer

In [21]:
from sklearn.impute import SimpleImputer

# numerical processing pipeline

In [22]:
import numpy as np
numeric_processor = Pipeline(
    steps=[('imputation_mean', SimpleImputer(missing_values=np.nan, strategy='mean')),
           ('scaler', StandardScaler())]
)

In [23]:
numeric_processor

# Categorical processing pipeline

In [24]:
from sklearn.preprocessing import OneHotEncoder
categorical_processor = Pipeline(
    steps=[('imputation_constant', SimpleImputer(fill_value='missing', strategy='constant')),
          ('onehot',OneHotEncoder(handle_unknown='ignore'))]
)

In [25]:
categorical_processor

# Combine processing technique

In [26]:
from sklearn.compose import ColumnTransformer

In [27]:
preprocessor=ColumnTransformer(
    [('categorical',categorical_processor,['gender','city']),
    ('numeric', numeric_processor,['age','height'])]

)

In [28]:
preprocessor

In [29]:
from sklearn.pipeline import make_pipeline

In [30]:
final_pipe=make_pipeline(preprocessor,LogisticRegression())

In [31]:
final_pipe