In [None]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

# Sample dataset
data = {
    'Age': [25, 32, None, 51, 62],
    'Salary': [50000, 60000, 80000, None, 150000],
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Female'],
    'Department': ['HR', 'IT', 'Finance', 'IT', None]
}
df = pd.DataFrame(data)

# Identify column types
numerical_features = ['Age', 'Salary']
categorical_features = ['Gender', 'Department']

# Define pipelines
numeric_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(drop='first', sparse=False))
])

# Combine into a single column transformer
preprocessor = ColumnTransformer([
    ('num', numeric_pipeline, numerical_features),
    ('cat', categorical_pipeline, categorical_features)
])

# Fit and transform
processed_data = preprocessor.fit_transform(df)

# Get encoded feature names
encoded_cat_columns = preprocessor.named_transformers_['cat'].named_steps['encoder'].get_feature_names_out(categorical_features)
final_columns = numerical_features + list(encoded_cat_columns)

# Convert to DataFrame
processed_df = pd.DataFrame(processed_data, columns=final_columns)

print("Processed DataFrame:\n", processed_df)