In [12]:
# src/pipeline.py

import sys
import os
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer

from src.feature_engineering import TransactionTimeFeatures, AggregateTransactionFeatures
# from src.woe_iv import WOETransformer  # Uncomment if using WOE

# ================================
# Define Column Groups
# ================================

CATEGORICAL_COLS = [
    'CurrencyCode', 'ProviderId', 'ProductId', 'ProductCategory', 'ChannelId', 'PricingStrategy'
]


NUMERICAL_COLS = [
    'Amount', 'Value',
    'TotalAmount', 'AvgAmount', 'TransactionCount', 'AmountStdDev',
    'transaction_hour', 'transaction_day', 'transaction_month', 'transaction_year'
]


DATETIME_COL = 'TransactionStartTime'
AGG_GROUP_COL = 'ProviderId'
AGG_VALUE_COL = 'Amount'

# ================================
# Pipelines for Sub-Transforms
# ================================

categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

numerical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# ================================
# ColumnTransformer
# ================================

def build_transaction_pipeline():
    preprocessing = ColumnTransformer(transformers=[
        ('num', numerical_pipeline, NUMERICAL_COLS),
        ('cat', categorical_pipeline, CATEGORICAL_COLS)
    ])

    full_pipeline = Pipeline(steps=[
        ('aggregate_features', AggregateFeatures(group_col=AGG_GROUP_COL, value_col=AGG_VALUE_COL)),
        ('datetime_features', TransactionTimeFeatures(datetime_col=DATETIME_COL)),
        ('preprocessing', preprocessing),
        # Uncomment below if using supervised learning with WOE
        # ('woe', WOETransformer(columns=['ChannelId', 'ProductCategory']))
    ])

    return full_pipeline
