In [4]:
import numpy as np, pandas as pd, seaborn as sns

In [5]:
df = sns.load_dataset('titanic')
df.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone'],
      dtype='object')

In [6]:
df = df[['sex', 'age', 'fare','class', 'who', 'embark_town','alive', 'alone']]

In [7]:
X, y = df.drop(columns = ['alive']), df['alive']
xCont, xCat = X[['age', 'fare']], X.drop(columns = ['age', 'fare'])

In [8]:
X.columns

Index(['sex', 'age', 'fare', 'class', 'who', 'embark_town', 'alone'], dtype='object')

In [9]:
import sklearn
from sklearn.model_selection import train_test_split

In [10]:
xTrain, xTest, yTrain, yTest = train_test_split( X, y,
                                                                             test_size = .32,
                                                                            random_state = 23)

In [11]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import RobustScaler, PowerTransformer, OrdinalEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

* impute embark_town with simple imputer
* OHE LOO - sex, who, embark_town, alone
* Ordinal encoding on class

In [12]:
catColsNominal = ['sex', 'who', 'embark_town', 'alone']
catColsOrdinal = ['class']
contCols = ['age', 'fare']

In [13]:
catNominalPipeLine = Pipeline(steps = [
    ('embarkeImputation', SimpleImputer(strategy = 'most_frequent')),
    ('oneHotEncoding', OneHotEncoder(sparse_output = False, drop = 'first' )),

])

In [14]:
catOrdinalPipeline = Pipeline(steps = [
    ('ordImputation',SimpleImputer(strategy = 'most_frequent')),
    ('ordEncoding', OrdinalEncoder())
])

In [15]:
contPipeline = Pipeline(steps = [
    ('simpleImputation', SimpleImputer(strategy = 'median')),
    ('robustScaler', RobustScaler()),
    ('yeoJohnson', PowerTransformer())
])

In [16]:
# colTransformer
preColTrans = ColumnTransformer(transformers = [
    ('catNominalPipeLine', catNominalPipeLine,  catColsNominal),
    ('catOrdinalPipeline', catOrdinalPipeline,  catColsOrdinal),
    ('contPipeline', contPipeline, contCols)
], remainder = 'passthrough')

In [17]:
finalPipeLine = Pipeline(steps = [
    ('preColTrans', preColTrans),
])

In [18]:
#dir(finalPipeLine)

In [19]:
finalPipeLine.fit_transform(xTrain).shape

(605, 9)

In [20]:
xTrain.shape

(605, 7)

In [21]:
from sklearn import set_config
set_config(display="diagram")
finalPipeLine