In [1]:
#Imports
import pandas as pd
import numpy as np
from src.features import engineer_features, build_preprocessor
from src.preprocessing import split_features_target, split_train_test, drop_unused_columns
from src.data_ingestion import load_engineered_data
from src.utils import save_model_inputs

In [2]:
#Different feature types need different preprocessing tenchniques
NUMERICAL_FEATURES = ['age', 'tenure','balance','credit_score','products_number',
                      'products_per_tenure','balance_per_product','churn_risk_score']
BINARY_FEATURES = ['credit_card','active_member','inactive_single_product',
                   'zero_balance','high_balance','early_customer']
CATEGORICAL_FEATURES = ['age_group','credit_score_band']

In [3]:
#Load raw data
df = load_engineered_data("C:\customerchurnprediction\data\Preprocessed\preprocessedbank_churn.csv")

In [4]:
#Feature Engineering
df = engineer_features(df)

In [5]:
#Split features and Target
X, y = split_features_target(df, 'churn')
X_train, X_test, y_train, y_test =split_train_test(X, y)

In [6]:
#Build preprocessor
preprocessor = build_preprocessor(
    numerical_features= NUMERICAL_FEATURES,
    categorical_features= CATEGORICAL_FEATURES,
)

In [7]:
#Apply Transformations
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

In [8]:
#Save engineered dataset
df.to_csv('C:\customerchurnprediction\data\engineered\engineeredbank_churn.csv', index=False)