In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

def one_hot_encode_dataframe(df, exclude_columns=[]):
    # Identify categorical columns
    categorical_columns = df.select_dtypes(include=['object']).columns

    # Exclude specified columns
    categorical_columns = [col for col in categorical_columns if col not in exclude_columns]

    # Create a DataFrame with only the categorical columns
    df_categorical = df[categorical_columns]

    # Create a DataFrame with the non-categorical columns
    df_non_categorical = df.drop(columns=categorical_columns)

    # Instantiate the OneHotEncoder
    encoder = OneHotEncoder(sparse=False, drop='first')  # Use drop='first' to avoid the dummy variable trap

    # Fit and transform the categorical columns
    encoded_data = encoder.fit_transform(df_categorical)

    # Create a DataFrame with the encoded data and column names
    encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(df_categorical.columns))

    # Concatenate the original non-categorical DataFrame with the encoded DataFrame
    df_encoded = pd.concat([df_non_categorical, encoded_df], axis=1)

    return df_encoded

# Example usage:
# Assuming you have a DataFrame named 'your_dataframe' with ID column 'id_column'
# You want to exclude the 'id_column' from one-hot encoding
your_dataframe = pd.DataFrame({
    'id_column': [1, 2, 3],
    'category_column': ['A', 'B', 'A'],
    'another_column': ['X', 'Y', 'X']
})

# Specify the columns to exclude (e.g., ID columns)
exclude_columns = ['id_column']

# Apply one-hot encoding excluding specified columns
encoded_dataframe = one_hot_encode_dataframe(your_dataframe, exclude_columns)

# Display the result
print(encoded_dataframe)

   id_column  category_column_B  another_column_Y
0          1                0.0               0.0
1          2                1.0               1.0
2          3                0.0               0.0
