In [None]:
# data-preprocessing
from sklearn.impute import SimpleImputer # imputation transformer for completing nan values
from sklearn.preprocessing import StandardScaler, OneHotEncoder
# feature selector
from sklearn.decomposition import PCA
# model
from sklearn.linear_model import LinearRegression
# composer and pipeline
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# load dataset
from sklearn.datasets import fetch_openml

In [None]:
X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True)
X.dtypes

In [None]:
numerical_features = [col for col in X.columns if X[col].dtypes == 'float64']
cat_features = [col for col in X.columns if X[col].dtypes == 'category']
len(numerical_features), len(cat_features)

In [None]:
numerical_transformer = Pipeline(
    steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ]
)

categorical_transformer = Pipeline(
    steps=[
        ('imputer', SimpleImputer(strategy='constant')),
        ('one-hot', OneHotEncoder(handle_unknown='ignore'))
    ]
)

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features), 
        ('cat', categorical_transformer, cat_features)
        ]
)

my_pipe = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
        ('pca', PCA()),
        ('clf', LinearRegression())
    ]
)


In [None]:
my_pipe