## Use ColumnTransformer to apply different preprocessing to different columns:

- select from DataFrame columns by name
- passthrough or drop unspecified columns
- Requires scikit-learn 0.20+



In [2]:
import numpy as np 
import pandas as pd 
import seaborn as sns

In [3]:
titanic = sns.load_dataset("titanic")
titanic

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [12]:
from sklearn.compose import make_column_selector, ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

ct = ColumnTransformer([
    ('ohe',OneHotEncoder(), ['embarked','sex']),
    ('imputer',SimpleImputer(),['age']),
    ], 
     remainder='drop'
)



In [10]:
import pandas as pd
import seaborn as sns
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline

# Load Titanic dataset
titanic = sns.load_dataset("titanic").dropna()  # Drop NaNs for simplicity

# Select features and target
X = titanic[['age', 'fare', 'sex', 'class']]
y = titanic['survived']

# Define transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['age', 'fare']),
        ('cat', OneHotEncoder(drop='first'), ['sex', 'class'])
    ],
    remainder='drop'  # Drop columns not explicitly transformed
)

# Transform dataset
X_transformed = preprocessor.fit_transform(X)
print(X_transformed[:5])  # Display first 5 transformed rows


[[ 0.15208196 -0.10011013  0.          0.          0.        ]
 [-0.03987502 -0.33848477  0.          0.          0.        ]
 [ 1.17585249 -0.35470782  1.          0.          0.        ]
 [-2.02343043 -0.81567192  0.          0.          1.        ]
 [ 1.43179512 -0.68654298  0.          0.          0.        ]]


In [None]:
import pandas as pd 
import numpy as np 
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import seaborn as sns 

data = sns.load_dataset("titanic")

# Transformers for numerical and categorical features
numeric_transformer = Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='mean')),
    ('scalar',StandardScaler())
])


categorical_transformer = Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='most_frequent')),
    ('encoder',OneHotEncoder(handle_unknown='ignore'))
])


# Define ColumnTransformer

preprocessor = ColumnTransformer(
    transformers=[
        ('num',numeric_transformer,['age','fare']),
        ('cat',categorical_transformer,['sex','embarked'])
    ], 
    remainder='passthrough', # pass through unspecified columns
    n_jobs=-1,  # Utilize all processor for parallel computation
    verbose=True, # print fitting names
    verbose_feature_names_out=True, # Prefix features names with transformer name

    )   


# Transform the dataset

transformed_data = preprocessor.fit_transform(data)

feature_names = preprocessor.get_feature_names_out()
print("Feature names: ", feature_names)

transformed_df = pd.DataFrame(transformed_data, columns=feature_names)
transformed_df


Feature names:  ['num__age' 'num__fare' 'cat__sex_female' 'cat__sex_male'
 'cat__embarked_C' 'cat__embarked_Q' 'cat__embarked_S'
 'remainder__survived' 'remainder__pclass' 'remainder__sibsp'
 'remainder__parch' 'remainder__class' 'remainder__who'
 'remainder__adult_male' 'remainder__deck' 'remainder__embark_town'
 'remainder__alive' 'remainder__alone']


Unnamed: 0,num__age,num__fare,cat__sex_female,cat__sex_male,cat__embarked_C,cat__embarked_Q,cat__embarked_S,remainder__survived,remainder__pclass,remainder__sibsp,remainder__parch,remainder__class,remainder__who,remainder__adult_male,remainder__deck,remainder__embark_town,remainder__alive,remainder__alone
0,-0.592481,-0.502445,0.0,1.0,0.0,0.0,1.0,0,3,1,0,Third,man,True,,Southampton,no,False
1,0.638789,0.786845,1.0,0.0,1.0,0.0,0.0,1,1,1,0,First,woman,False,C,Cherbourg,yes,False
2,-0.284663,-0.488854,1.0,0.0,0.0,0.0,1.0,1,3,0,0,Third,woman,False,,Southampton,yes,True
3,0.407926,0.42073,1.0,0.0,0.0,0.0,1.0,1,1,1,0,First,woman,False,C,Southampton,yes,False
4,0.407926,-0.486337,0.0,1.0,0.0,0.0,1.0,0,3,0,0,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,-0.207709,-0.386671,0.0,1.0,0.0,0.0,1.0,0,2,0,0,Second,man,True,,Southampton,no,True
887,-0.823344,-0.044381,1.0,0.0,0.0,0.0,1.0,1,1,0,0,First,woman,False,B,Southampton,yes,True
888,0.0,-0.176263,1.0,0.0,0.0,0.0,1.0,0,3,1,2,Third,woman,False,,Southampton,no,False
889,-0.284663,-0.044381,0.0,1.0,1.0,0.0,0.0,1,1,0,0,First,man,True,C,Cherbourg,yes,True
