In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Sample dataset
data = {
    'Age': [25, 32, 47, 51, 62],
    'Salary': [50000, 60000, 80000, 90000, 150000],
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Female'],
    'Department': ['HR', 'IT', 'Finance', 'IT', 'HR']
}
df = pd.DataFrame(data)

# Identify numerical and categorical columns
numerical_features = ['Age', 'Salary']
categorical_features = ['Gender', 'Department']

# Define transformers
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(sparse=False, drop='first')  # drop='first' to avoid dummy variable trap

# Create column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

# Fit and transform the data
processed_data = preprocessor.fit_transform(df)

# Get column names after encoding
encoded_columns = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features)
final_columns = numerical_features + list(encoded_columns)

# Create final DataFrame
processed_df = pd.DataFrame(processed_data, columns=final_columns)
print("Processed Data:\n", processed_df)