In [3]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [4]:
steps = [("standard_scaler",StandardScaler()),("classifier",LogisticRegression())]

In [5]:
steps

[('standard_scaler', StandardScaler()), ('classifier', LogisticRegression())]

In [6]:
pipe = Pipeline(steps)

In [7]:
from sklearn import set_config

In [8]:
set_config(display = "diagram")

In [9]:
pipe

0,1,2
,steps,"[('standard_scaler', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [11]:
# First, define X and y with your data
# For example:
import numpy as np
import pandas as pd

# Option 1: If you have a DataFrame
# df = pd.read_csv('your_data.csv')
# X = df.drop('target_column', axis=1)
# y = df['target_column']

# Option 2: For demonstration with random data
X = np.random.rand(100, 4)  # 100 samples, 4 features
y = np.random.randint(0, 2, 100)  # Binary target variable

# Now split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


In [12]:
X_train

array([[0.70647046, 0.54189403, 0.34075002, 0.00741929],
       [0.77170988, 0.01590256, 0.7116803 , 0.48404469],
       [0.55112457, 0.17897951, 0.14854581, 0.68422361],
       [0.01298123, 0.08500931, 0.65215931, 0.54052256],
       [0.39565495, 0.85298286, 0.73210556, 0.25370955],
       [0.69229683, 0.96866307, 0.89422075, 0.91245577],
       [0.74719444, 0.99300198, 0.44127853, 0.24724992],
       [0.07698957, 0.33099123, 0.5194335 , 0.38840117],
       [0.42719831, 0.71694021, 0.71729596, 0.9744528 ],
       [0.68871247, 0.97842683, 0.43401491, 0.22492709],
       [0.50523026, 0.53914938, 0.61677895, 0.78504561],
       [0.48367405, 0.65185971, 0.39017763, 0.1498339 ],
       [0.82261818, 0.67281663, 0.64035163, 0.00308685],
       [0.88312449, 0.18815298, 0.27575313, 0.9780018 ],
       [0.14223862, 0.92930865, 0.03439809, 0.80247432],
       [0.37891789, 0.49211874, 0.09754737, 0.51976906],
       [0.46215731, 0.20307831, 0.70476305, 0.51512903],
       [0.29333262, 0.94528901,

In [13]:
pipe.fit(X_train,y_train)

0,1,2
,steps,"[('standard_scaler', ...), ('classifier', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [14]:
y_pred = pipe.predict(X_test)

In [15]:
y_pred

array([0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0], dtype=int32)

In [16]:
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [17]:
steps = [('scaling', StandardScaler()),
         ('PCA', PCA(n_components = 3)),
         ('SVC', SVC())]

In [18]:
pipe2 = Pipeline(steps)
pipe2.fit(X_train, y_train)

0,1,2
,steps,"[('scaling', ...), ('PCA', ...), ...]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,n_components,3
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [19]:
pipe2.predict(X_test)

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0], dtype=int32)

In [20]:
from sklearn.impute import SimpleImputer

In [21]:
import numpy as np
numeric_processor=Pipeline(
    steps=[("imputation_mean",SimpleImputer(missing_values=np.nan,strategy="mean")),("scaler",StandardScaler())]

)

In [22]:
numeric_processor

0,1,2
,steps,"[('imputation_mean', ...), ('scaler', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,missing_values,
,strategy,'mean'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,copy,True
,with_mean,True
,with_std,True


In [24]:
from sklearn.preprocessing import OneHotEncoder
categorical_processor=Pipeline(
    steps=[("imputation_constant",SimpleImputer(fill_value="missing",strategy="constant")),
           ("onehot",OneHotEncoder(handle_unknown="ignore"))]

)

In [25]:
categorical_processor

0,1,2
,steps,"[('imputation_constant', ...), ('onehot', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,missing_values,
,strategy,'constant'
,fill_value,'missing'
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'


In [26]:
from sklearn.compose import ColumnTransformer

In [27]:
    preprocessor=ColumnTransformer(
        [("categorical",categorical_processor,["gender","city"]),
         ("numerical",numeric_processor,["age","height"])]
    )

In [28]:
    preprocessor

0,1,2
,transformers,"[('categorical', ...), ('numerical', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,missing_values,
,strategy,'constant'
,fill_value,'missing'
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,missing_values,
,strategy,'mean'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,copy,True
,with_mean,True
,with_std,True


In [30]:
from sklearn.pipeline import make_pipeline

In [31]:
pipe=make_pipeline(preprocessor,LogisticRegression())

In [32]:
pipe

0,1,2
,steps,"[('columntransformer', ...), ('logisticregression', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('categorical', ...), ('numerical', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,missing_values,
,strategy,'constant'
,fill_value,'missing'
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,missing_values,
,strategy,'mean'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100
