In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

def clean_data(df):
    # Step 1: Remove duplicates
    df = df.drop_duplicates()

    # Step 2: Detect column types
    numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
    categorical_cols = df.select_dtypes(include=['object']).columns.tolist()

    # Step 3: Handle missing values and scaling/encoding using pipelines
    numeric_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])

    categorical_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder(drop='first', sparse=False))
    ])

    preprocessor = ColumnTransformer([
        ('num', numeric_pipeline, numeric_cols),
        ('cat', categorical_pipeline, categorical_cols)
    ])

    # Step 4: Apply transformations
    df_clean = preprocessor.fit_transform(df)

    # Step 5: Create clean DataFrame with new column names
    encoded_cat_names = preprocessor.named_transformers_['cat'].named_steps['encoder'].get_feature_names_out(categorical_cols)
    all_columns = numeric_cols + list(encoded_cat_names)
    df_cleaned = pd.DataFrame(df_clean, columns=all_columns)

    return df_cleaned

# Example usage
data = {
    'Age': [25, 32, None, 51, 62, 32],
    'Salary': [50000, 60000, 80000, None, 150000, 60000],
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Female', 'Female'],
    'Department': ['HR', 'IT', 'Finance', 'IT', None, 'IT']
}
df = pd.DataFrame(data)

cleaned_df = clean_data(df)
print("Cleaned DataFrame:\n", cleaned_df)