## Feature Engineering and Preprocessing

### Import Libraries

In [1]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

### Load Dataset

In [2]:
data = pd.read_csv("E:/data06/data.csv")

### Separate Features and Target

In [4]:
X = data.drop("FraudResult", axis=1)
y = data["FraudResult"]

#### Define Numeric and Categorical Columns

In [5]:
numeric_features = X.select_dtypes(include=['float64', 'int64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

### Preprocessing Pipelines

In [6]:
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

#### Combine Preprocessors

In [7]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

### Apply Preprocessing

In [9]:
X_preprocessed = preprocessor.fit_transform(X)
print("Preprocessing Complete")
X_preprocessed

Preprocessing Complete


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1339268 stored elements and shape (95662, 296076)>