In [31]:
import pandas as pd
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

In [32]:
data = pd.read_csv('Titanic.csv')

# Select a few rows to demonstrate the result

X = data.loc[:5,['Pclass','Sex','Age','Embarked','Fare']]
X

Unnamed: 0,Pclass,Sex,Age,Embarked,Fare
0,3,male,22.0,S,7.25
1,1,female,38.0,C,71.2833
2,3,female,26.0,S,7.925
3,1,female,35.0,S,53.1
4,3,male,35.0,S,8.05
5,3,male,,Q,8.4583


In [33]:
# Instantiate the encoder and the imputer

ohe = OneHotEncoder()
si = SimpleImputer()

In [34]:
# Make the column transformer

ct1 = make_column_transformer(
    (ohe, ['Sex','Embarked']),  # Apply OneHotEncoder to 'Sex' and 'Embarked' columns
    (si, ['Age']),              # Apply SimpleImputer to 'Age' column using the default strategy = mean
    remainder = 'passthrough')  # Passthrough the rest of the columns

In [35]:
ct1.fit_transform(X)

array([[ 0.    ,  1.    ,  0.    ,  0.    ,  1.    , 22.    ,  3.    ,
         7.25  ],
       [ 1.    ,  0.    ,  1.    ,  0.    ,  0.    , 38.    ,  1.    ,
        71.2833],
       [ 1.    ,  0.    ,  0.    ,  0.    ,  1.    , 26.    ,  3.    ,
         7.925 ],
       [ 1.    ,  0.    ,  0.    ,  0.    ,  1.    , 35.    ,  1.    ,
        53.1   ],
       [ 0.    ,  1.    ,  0.    ,  0.    ,  1.    , 35.    ,  3.    ,
         8.05  ],
       [ 0.    ,  1.    ,  0.    ,  1.    ,  0.    , 31.2   ,  3.    ,
         8.4583]])

In [36]:
ct2 = make_column_transformer(
    (ohe, ['Sex','Embarked']),    # Apply OneHotEncoder to 'Sex' and 'Embarked' columns
    (si, ['Age']),               # Apply SimpleImputer to 'Age' column with strategy = mean
    ('passthrough', ['Fare']),   # Passthrough the column 'Fare'
    remainder = 'drop')          # Drop the rest of the columns 

In [37]:
ct2.fit_transform(X)

array([[ 0.    ,  1.    ,  0.    ,  0.    ,  1.    , 22.    ,  7.25  ],
       [ 1.    ,  0.    ,  1.    ,  0.    ,  0.    , 38.    , 71.2833],
       [ 1.    ,  0.    ,  0.    ,  0.    ,  1.    , 26.    ,  7.925 ],
       [ 1.    ,  0.    ,  0.    ,  0.    ,  1.    , 35.    , 53.1   ],
       [ 0.    ,  1.    ,  0.    ,  0.    ,  1.    , 35.    ,  8.05  ],
       [ 0.    ,  1.    ,  0.    ,  1.    ,  0.    , 31.2   ,  8.4583]])

In [38]:
ct3 = make_column_transformer(
    (ohe, ['Sex','Embarked']),    # Apply OneHotEncoder to 'Sex' and 'Embarked' columns
    (si, ['Age']),                # Apply SimpleImputer to 'Age' column with strategy = mean
    ('drop', ['Fare']),           # Drop the column 'Fare'
    remainder = 'passthrough')    # Passthrough the rest of the columns 

In [39]:
ct3.fit_transform(X)

array([[ 0. ,  1. ,  0. ,  0. ,  1. , 22. ,  3. ],
       [ 1. ,  0. ,  1. ,  0. ,  0. , 38. ,  1. ],
       [ 1. ,  0. ,  0. ,  0. ,  1. , 26. ,  3. ],
       [ 1. ,  0. ,  0. ,  0. ,  1. , 35. ,  1. ],
       [ 0. ,  1. ,  0. ,  0. ,  1. , 35. ,  3. ],
       [ 0. ,  1. ,  0. ,  1. ,  0. , 31.2,  3. ]])