In [55]:
import pandas as pd
import numpy as np
import importlib
import pipeline_factory

# Force the notebook to see the latest changes in your .py file
importlib.reload(pipeline_factory)
from pipeline_factory import create_preprocessing_pipeline

# 1. Configuration
pipeline_cfg = {
    'numeric': {
        'cols': ['tenure_months', 'monthly_spend'],
        'impute_strategy': 'mean',
        'scaling': 'standard'
    },
    'categorical': {
        'cols': ['region', 'plan_type']
    }
}

# 2. Sample Data
raw_df = pd.DataFrame({
    'tenure_months': [12, np.nan, 24, 5],
    'monthly_spend': [85.50, 42.00, np.nan, 110.25],
    'region': ['Bangalore', 'Mumbai', 'Bangalore', np.nan],
    'plan_type': ['Basic', 'Premium', 'Premium', 'Basic'],
    'customer_id': [1001, 1002, 1003, 1004] 
})
print(raw_df)
# 3. Execution
print("\n[INFO] Building pipeline...")
pipe = create_preprocessing_pipeline(pipeline_cfg)
processed_data = pipe.fit_transform(raw_df)

# 4. Result Formatting
# We extract feature names to make the output readable
cat_step = pipe.named_steps['preprocessor'].transformers_[1][1].named_steps['encoder']
cat_features = cat_step.get_feature_names_out(pipeline_cfg['categorical']['cols'])

all_labels = pipeline_cfg['numeric']['cols'] + list(cat_features) + ['customer_id']
output_df = pd.DataFrame(processed_data, columns=all_labels)

print("\n--- Final Transformation ---")
print(output_df)

   tenure_months  monthly_spend     region plan_type  customer_id
0           12.0          85.50  Bangalore     Basic         1001
1            NaN          42.00     Mumbai   Premium         1002
2           24.0            NaN  Bangalore   Premium         1003
3            5.0         110.25        NaN     Basic         1004

[INFO] Building pipeline...

--- Final Transformation ---
   tenure_months  monthly_spend  region_Bangalore  region_Mumbai  region_NA  \
0      -0.245293       0.255816               1.0            0.0        0.0   
1       0.000000      -1.524661               0.0            1.0        0.0   
2       1.520814       0.000000               1.0            0.0        0.0   
3      -1.275522       1.268845               0.0            0.0        1.0   

   plan_type_Basic  plan_type_Premium  customer_id  
0              1.0                0.0       1001.0  
1              0.0                1.0       1002.0  
2              0.0                1.0       1003.0  
3  