In [12]:
import pandas as pd
import numpy as np

df = pd.read_csv("../Data/cleaned/BankChurners_cleaned.csv")

# Step 1: Activity Intensity Feature

###  Why?

Two customers may have:

* Same transactions count
* But **very different spending**

###  Feature

**Average amount per transaction**

In [13]:
df['Avg_Transaction_Value'] = df['Total_Trans_Amt'] / df['Total_Trans_Ct']

#  Step 2: Customer Engagement Score

### Why?

Churned customers usually:

* Fewer transactions
* More inactivity
* Fewer relationships

### Feature

Combine engagement signals:


In [14]:
df['Engagement_Score'] = (
    df['Total_Trans_Ct'] +
    df['Total_Relationship_Count'] -
    df['Months_Inactive_12_mon']
)

# Step 3: Credit Usage Pressure

### Why?

Customers close to their credit limit are **financially stressed**

### Feature

Credit pressure ratio:


In [15]:
df['Credit_Pressure'] = df['Total_Revolving_Bal'] / df['Credit_Limit']

# Step 4: High Credit Customer Flag

### Why?

High-credit customers behave differently & churn less.

### Feature

Binary feature (easy for models):

In [16]:
credit_median = df['Credit_Limit'].median()
df['High_Credit_Flag'] = (df['Credit_Limit'] > credit_median).astype(int)

# Step 5: Loyalty (Tenure Buckets)

### Why?

Tenure is **not linear**.
A customer with 60 months â‰  20 months.

### Feature

Bucket `Months_on_book`:


In [17]:
def tenure_group(x):
    if x <= 24:
        return 0   # New
    elif x <= 48:
        return 1   # Medium
    else:
        return 2   # Loyal

df['Tenure_Group'] = df['Months_on_book'].apply(tenure_group)


# Step 6: Transaction Frequency Category

### Why?

Low transaction users are **high churn risk**

### Feature

Quantile-based binning:


In [18]:
df['Trans_Freq_Group'] = pd.qcut(
    df['Total_Trans_Ct'],
    q=3,
    labels=[0, 1, 2]  # Low, Medium, High
)

# Step 7: Contact Pressure Indicator

### Why?

Too many contacts often = complaints or risk.

### Feature

Binary stress flag:


In [19]:
df['High_Contact_Flag'] = (df['Contacts_Count_12_mon'] >= 3).astype(int)

# Step 8: Stability Score (Behavior Change)

### Why?

Sudden changes often precede churn.

### Feature

Combine change ratios:

In [20]:
df['Stability_Score'] = (
    df['Total_Amt_Chng_Q4_Q1'] +
    df['Total_Ct_Chng_Q4_Q1']
) / 2

In [22]:
df.to_csv("../data/Engineered/BankChurners_Engineered_V2.csv", index=False)