In [1]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

# Assuming 'df' is the dataframe containing the dataset
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['HighIncome']), df['HighIncome'], test_size=0.2)

# ColumnTransformer with different transformers
transformer = ColumnTransformer(transformers=[
    ('impute_education', SimpleImputer(strategy='most_frequent'), ['EducationLevel']),
    ('onehot_gender', OneHotEncoder(sparse_output=False, drop='first'), ['Gender']),
    ('onehot_city', OneHotEncoder(sparse_output=False, drop='first'), ['City'])
], remainder='passthrough')

# Fitting the transformer on the training data
transformer.fit(X_train)

# Transforming the train and test data
X_train_transformed = transformer.transform(X_train)
X_test_transformed = transformer.transform(X_test)

# Checking the shape of the transformed datasets
print(X_train_transformed.shape)
print(X_test_transformed.shape)