In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [None]:
# Load the dataset
data = pd.read_csv('data.csv')

In [None]:

# Define features (X) and target variable (y)
X = data.drop('target_column', axis=1)  # Replace 'target_column' with the actual name of the target column
y = data['target_column']

In [None]:

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(exclude=['object']).columns

In [None]:

# Create a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),  # Fill missing values in numerical columns with the mean
            ('scaler', StandardScaler())  # Standardize numerical values
        ]), numerical_cols),
        ('cat', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),  # Fill missing values in categorical columns with the most frequent value
            ('encoder', OneHotEncoder(handle_unknown='ignore'))  # Convert categorical variables to one-hot encoding
        ]), categorical_cols)
    ]
)

In [None]:

# Apply preprocessing
X_processed = preprocessor.fit_transform(X)