In [None]:
# Ques_1.ipynb

# Importing necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Sample dataset
data = {
    'Age': [25, 30, 45, 35, 22],
    'Salary': [50000, 60000, 80000, 120000, 35000],
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Female'],
    'Department': ['HR', 'IT', 'Finance', 'Finance', 'HR']
}

df = pd.DataFrame(data)
print("Original Data:")
print(df)

# Separate features into numerical and categorical
numerical_features = ['Age', 'Salary']
categorical_features = ['Gender', 'Department']

# Pipelines for preprocessing
# Numerical pipeline
num_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())  # or MinMaxScaler()
])

# Categorical pipeline
cat_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(drop='first'))
])

# Combine pipelines
preprocessor = ColumnTransformer(transformers=[
    ('num', num_pipeline, numerical_features),
    ('cat', cat_pipeline, categorical_features)
])

# Apply transformations
transformed_data = preprocessor.fit_transform(df)

# Convert the transformed data back to a DataFrame
# Get feature names from the transformers
encoded_feature_names = preprocessor.named_transformers_['cat']['encoder'].get_feature_names_out(categorical_features)
all_feature_names = numerical_features + list(encoded_feature_names)
transformed_df = pd.DataFrame(transformed_data, columns=all_feature_names)

print("\nTransformed Data:")
print(transformed_df)