In [1]:
import sys
sys.path.append('../src')

In [2]:
# 1. Load necessary packages
import pandas as pd
from feature_engineering import get_feature_engineering_pipeline

In [3]:
# 2. Load your raw data
df = pd.read_csv('../data/raw/data.csv')  # Adjust path if needed

In [4]:
# 3. Define config for the pipeline
config = {
    'datetime_col': 'TransactionStartTime',
    'customer_id_col': 'CustomerId',
    'amount_col': 'Amount',
    'cat_cols': ['ProductCategory', 'ProviderId', 'ChannelId'],
    'num_cols': ['Amount', 'Value', 'PricingStrategy'],
    'target_col': 'FraudResult',
    'imputation_strategy': 'mean',         # or 'median', etc.
    'scaling_method': 'standard'           # or 'minmax'
}


In [5]:
# 4. Initialize the pipeline
pipeline = get_feature_engineering_pipeline(config)

In [6]:
# 5. Fit and transform the data
# Use `FraudResult` as target if available
X_transformed = pipeline.fit_transform(df)

In [20]:
from feature_engineering import get_pipeline_feature_names

X_array = pipeline.transform(df)
column_names = get_pipeline_feature_names(pipeline, config)

# Create DataFrame with proper headers
X_df = pd.DataFrame(X_array, columns=column_names)

# (Optional) Attach original ID
X_df['CustomerId'] = df['CustomerId'].values

# Save to CSV
X_df.to_csv("data/processed/X_transformed_named.csv", index=False)


ImportError: cannot import name 'get_pipeline_feature_names' from 'feature_engineering' (c:\Users\User\Desktop\Week_5\Bank_Credit_Scoring\notebooks\../src\feature_engineering.py)

In [7]:
feature_names = pipeline.named_steps['preprocessor'].get_feature_names_out()
X_transformed_df = pd.DataFrame(X_transformed, columns=feature_names)

In [12]:
# 6. Inspect the transformed output
print(X_transformed_df.shape)
X_transformed_df = pd.DataFrame(X_transformed_df)
X_transformed_df.head()

(95662, 47)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,37,38,39,40,41,42,43,44,45,46
0,-0.046371,-0.072291,-0.349252,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,109921.75,923.712185,119,3042.294251,-5000.0,20000.0,-1.620379,-3.137005,0.472374
1,-0.054643,-0.080251,-0.349252,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0,109921.75,923.712185,119,3042.294251,-5000.0,20000.0,0.565446,-2.736867,-2.709038
2,-0.050426,-0.076352,-0.349252,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,1000.0,500.0,2,0.0,500.0,500.0,-1.620379,-3.137005,0.472374
3,0.107717,0.096648,-0.349252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,228727.2,6019.136842,38,17169.24161,-10000.0,100000.0,1.134962,1.618899,0.472374
4,-0.059704,-0.075183,-0.349252,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0,228727.2,6019.136842,38,17169.24161,-10000.0,100000.0,0.565446,-2.736867,-2.709038


In [9]:
from feature_engineering import save_pipeline, load_pipeline

# Save
save_pipeline(pipeline, '../data/processed/feature_pipeline.pkl')


# Load later
loaded_pipeline = load_pipeline('../data/processed/feature_pipeline.pkl')


In [14]:
X_transformed_df.to_csv('../data/processed/transformed_features.csv', index=False)
print("✅ Transformed features saved to: ../data/processed/transformed_features_2.csv")
print(f"Shape: {X_transformed_df.shape}")


✅ Transformed features saved to: ../data/processed/transformed_features_2.csv
Shape: (95662, 47)


In [17]:
print(pipeline.named_steps)

{'feature_extractor': FeatureExtractor(datetime_col='TransactionStartTime'), 'aggregator': Aggregator(amount_col='Amount', customer_id_col='CustomerId'), 'woe_encoder': WoeTransformer(cat_cols=['ProductCategory', 'ProviderId', 'ChannelId'],
               target_col='FraudResult'), 'preprocessor': ColumnTransformer(remainder='passthrough',
                  transformers=[('num',
                                 Pipeline(steps=[('imputer', SimpleImputer()),
                                                 ('scaler', StandardScaler())]),
                                 ['Amount', 'Value', 'PricingStrategy']),
                                ('cat',
                                 Pipeline(steps=[('imputer',
                                                  SimpleImputer(strategy='most_frequent')),
                                                 ('onehot',
                                                  OneHotEncoder(handle_unknown='ignore'))]),
                                 ['Pro