In [134]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle
import pandas as pd
import numpy as np

In [None]:
import warnings

warnings.filterwarnings("ignore")

In [136]:
# Load the trained model ,scaler, ohe,and label encoder
model = load_model("ann_model.h5")

with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)
with open("label_encoder_gender.pkl", "rb") as f:
    label_encoder = pickle.load(f)

with open("onehot_encoder_geography.pkl", "rb") as f:
    ohe = pickle.load(f)



In [137]:
# Example input data
input_data = {
    "CreditScore": 20,
    "Geography": "France",
    "Gender": "Male",
    "Age": 40,
    "Tenure": 3,
    "Balance": 500,
    "NumOfProducts": 2,
    "HasCrCard": 1,
    "IsActiveMember": 0,
    "EstimatedSalary": 73250,
}

In [138]:
geo_encoded = ohe.transform([[input_data["Geography"]]])
if hasattr(geo_encoded, "toarray"):
    geo_encoded = geo_encoded.toarray()
geo_encoded_df = pd.DataFrame(
    geo_encoded, columns=ohe.get_feature_names_out(["Geography"])
)

gender_encoded = label_encoder.transform([input_data["Gender"]])[0]

# Prepare the input data for prediction - matching the EXACT order used in training
# Training data order after preprocessing: CreditScore, Gender, Age, Tenure, Balance, NumOfProducts, HasCrCard, IsActiveMember, EstimatedSalary, Geography_France, Geography_Germany, Geography_Spain
input_data_processed = pd.DataFrame(
    {
        "CreditScore": [input_data["CreditScore"]],
        "Gender": [gender_encoded],
        "Age": [input_data["Age"]],
        "Tenure": [input_data["Tenure"]],
        "Balance": [input_data["Balance"]],
        "NumOfProducts": [input_data["NumOfProducts"]],
        "HasCrCard": [input_data["HasCrCard"]],
        "IsActiveMember": [input_data["IsActiveMember"]],
        "EstimatedSalary": [input_data["EstimatedSalary"]],
    }
)

# Concatenate with geography one-hot encoded columns in correct order
input_data_processed = pd.concat(
    [input_data_processed, geo_encoded_df],
    axis=1,
)

print("Processed input data:")
print(input_data_processed)
print("\nColumns:", input_data_processed.columns.tolist())

Processed input data:
   CreditScore  Gender  ...  Geography_Germany  Geography_Spain
0           20       1  ...                0.0              0.0

[1 rows x 12 columns]

Columns: ['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Geography_France', 'Geography_Germany', 'Geography_Spain']


In [139]:
# Debug: check label encoder
print("Label encoder classes:", label_encoder.classes_)
print("Gender encoded result:", label_encoder.transform([input_data["Gender"]]))
print("OneHotEncoder feature names:", ohe.get_feature_names_out(["Geography"]))

Label encoder classes: ['Female' 'Male']
Gender encoded result: [1]
OneHotEncoder feature names: ['Geography_France' 'Geography_Germany' 'Geography_Spain']


In [140]:
geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0


In [141]:
input_data

{'CreditScore': 20,
 'Geography': 'France',
 'Gender': 'Male',
 'Age': 40,
 'Tenure': 3,
 'Balance': 500,
 'NumOfProducts': 2,
 'HasCrCard': 1,
 'IsActiveMember': 0,
 'EstimatedSalary': 73250}

In [142]:
# Scale the processed input data (convert to numpy array to avoid feature name mismatch)
input_data_scaled = scaler.transform(input_data_processed.values)
print("Scaled input data shape:", input_data_scaled.shape)
print("Scaled input data:")
print(input_data_scaled)

# Make prediction
prediction = model.predict(input_data_scaled)
print("\nModel prediction:")
print(f"Churn probability: {prediction[0][0]:.4f}")
print(f"Will customer churn? {'Yes' if prediction[0][0] > 0.5 else 'No'}")

Scaled input data shape: (1, 12)
Scaled input data:
[[-6.55506917  0.91324755  0.10479359 -0.69539349 -1.21046506  0.80843615
   0.64920267 -1.02583358 -0.47259212  1.00150113 -0.57946723 -0.57638802]]
[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 83ms/step

Model prediction:
Churn probability: 0.1936
Will customer churn? No


In [None]:
# Test different input scenarios to find what triggers churn prediction
print("=" * 80)
print("TESTING DIFFERENT INPUT SCENARIOS FOR CHURN")
print("=" * 80)

test_scenarios = [
    {
        "name": "Low Income, No Balance, Inactive",
        "CreditScore": 300,
        "Geography": "Germany",
        "Gender": "Female",
        "Age": 45,
        "Tenure": 1,
        "Balance": 0,
        "NumOfProducts": 1,
        "HasCrCard": 0,
        "IsActiveMember": 0,
        "EstimatedSalary": 30000,
    },
    {
        "name": "Old Age, Low Tenure, Inactive",
        "CreditScore": 600,
        "Geography": "Germany",
        "Gender": "Male",
        "Age": 60,
        "Tenure": 1,
        "Balance": 0,
        "NumOfProducts": 1,
        "HasCrCard": 1,
        "IsActiveMember": 0,
        "EstimatedSalary": 50000,
    },
    {
        "name": "Germany, Female, High Age",
        "CreditScore": 500,
        "Geography": "Germany",
        "Gender": "Female",
        "Age": 65,
        "Tenure": 2,
        "Balance": 100000,
        "NumOfProducts": 1,
        "HasCrCard": 1,
        "IsActiveMember": 0,
        "EstimatedSalary": 100000,
    },
    {
        "name": "Excellent Profile (Low Churn Risk)",
        "CreditScore": 800,
        "Geography": "France",
        "Gender": "Male",
        "Age": 35,
        "Tenure": 10,
        "Balance": 200000,
        "NumOfProducts": 2,
        "HasCrCard": 1,
        "IsActiveMember": 1,
        "EstimatedSalary": 150000,
    },
]

for scenario in test_scenarios:
    scenario_name = scenario.pop("name")

    # Encode geography
    geo_enc = ohe.transform([[scenario["Geography"]]])
    if hasattr(geo_enc, "toarray"):
        geo_enc = geo_enc.toarray()
    geo_enc_df = pd.DataFrame(geo_enc, columns=ohe.get_feature_names_out(["Geography"]))

    # Encode gender
    gender_enc = label_encoder.transform([scenario["Gender"]])[0]

    # Prepare data
    data_proc = pd.DataFrame(
        {
            "CreditScore": [scenario["CreditScore"]],
            "Gender": [gender_enc],
            "Age": [scenario["Age"]],
            "Tenure": [scenario["Tenure"]],
            "Balance": [scenario["Balance"]],
            "NumOfProducts": [scenario["NumOfProducts"]],
            "HasCrCard": [scenario["HasCrCard"]],
            "IsActiveMember": [scenario["IsActiveMember"]],
            "EstimatedSalary": [scenario["EstimatedSalary"]],
        }
    )

    data_proc = pd.concat([data_proc, geo_enc_df], axis=1)

    # Scale and predict
    data_scaled = scaler.transform(data_proc.values)
    pred = model.predict(data_scaled, verbose=0)
    churn_prob = pred[0][0]

    print(f"\nüìä Scenario: {scenario_name}")
    print(
        f"   Age: {scenario['Age']}, Geography: {scenario['Geography']}, Gender: {scenario['Gender']}"
    )
    print(
        f"   CreditScore: {scenario['CreditScore']}, Tenure: {scenario['Tenure']} years"
    )
    print(
        f"   Balance: ${scenario['Balance']:,}, IsActiveMember: {scenario['IsActiveMember']}"
    )
    print(f"   ‚ûú Churn Probability: {churn_prob:.4f} ({churn_prob*100:.2f}%)")
    print(
        f"   ‚ûú Prediction: {'üî¥ YES - WILL CHURN' if churn_prob > 0.5 else 'üü¢ NO - WILL NOT CHURN'}"
    )

TESTING DIFFERENT INPUT SCENARIOS FOR CHURN

üìä Scenario: Low Income, No Balance, Inactive
   Age: 45, Geography: Germany, Gender: Female
   CreditScore: 300, Tenure: 1 years
   Balance: $0, IsActiveMember: 0
   ‚ûú Churn Probability: 0.8116 (81.16%)
   ‚ûú Prediction: üî¥ YES - WILL CHURN

üìä Scenario: Old Age, Low Tenure, Inactive
   Age: 60, Geography: Germany, Gender: Male
   CreditScore: 600, Tenure: 1 years
   Balance: $0, IsActiveMember: 0
   ‚ûú Churn Probability: 0.9397 (93.97%)
   ‚ûú Prediction: üî¥ YES - WILL CHURN

üìä Scenario: Germany, Female, High Age
   Age: 65, Geography: Germany, Gender: Female
   CreditScore: 500, Tenure: 2 years
   Balance: $100,000, IsActiveMember: 0
   ‚ûú Churn Probability: 0.9500 (95.00%)
   ‚ûú Prediction: üî¥ YES - WILL CHURN

üìä Scenario: Excellent Profile (Low Churn Risk)
   Age: 35, Geography: France, Gender: Male
   CreditScore: 800, Tenure: 10 years
   Balance: $200,000, IsActiveMember: 1
   ‚ûú Churn Probability: 0.0410 (4.10%

In [None]:
print("\n" + "=" * 80)
print("KEY FACTORS THAT TRIGGER CHURN PREDICTION (Probability > 0.5)")
print("=" * 80)
print(
    """
‚úÖ CUSTOMERS MOST LIKELY TO CHURN (High Risk):
   
   1. üåç GEOGRAPHY: Germany (very high churn risk)
   2. üë© GENDER: Female (higher churn than male)
   3. üìÖ AGE: Older customers (45+ years old, especially 60+)
   4. ‚è±Ô∏è TENURE: Short tenure (1-3 years, new customers at risk)
   5. üí∞ BALANCE: Low or zero balance
   6. üè¶ IsActiveMember: Inactive members (0) - much higher churn risk
   7. üí≥ NumOfProducts: Few products (1 product shows higher churn)
   
   EXAMPLE - HIGHEST CHURN RISK (95% probability):
   ‚Ä¢ Age: 65 years old
   ‚Ä¢ Geography: Germany
   ‚Ä¢ Gender: Female
   ‚Ä¢ Tenure: 2 years (new customer)
   ‚Ä¢ Balance: $100,000 (has money but inactive)
   ‚Ä¢ IsActiveMember: 0 (not using services actively)
   ‚Ä¢ NumOfProducts: 1 (uses only 1 product)
   ‚Ä¢ CreditScore: 500 (fair credit)

---

‚ùå CUSTOMERS LEAST LIKELY TO CHURN (Low Risk):
   
   1. üåç GEOGRAPHY: France or Spain
   2. üë® GENDER: Male
   3. üìÖ AGE: Younger customers (30-40 years old)
   4. ‚è±Ô∏è TENURE: Long tenure (8+ years, loyal customers)
   5. üí∞ BALANCE: High balance ($100,000+)
   6. üè¶ IsActiveMember: Active members (1) - using services
   7. üí≥ NumOfProducts: Multiple products (2+ products)
   
   EXAMPLE - LOWEST CHURN RISK (4.1% probability):
   ‚Ä¢ Age: 35 years old
   ‚Ä¢ Geography: France
   ‚Ä¢ Gender: Male
   ‚Ä¢ Tenure: 10 years (loyal customer)
   ‚Ä¢ Balance: $200,000 (high balance)
   ‚Ä¢ IsActiveMember: 1 (actively using services)
   ‚Ä¢ NumOfProducts: 2 products (using multiple services)
   ‚Ä¢ CreditScore: 800 (excellent credit)

"""
)


KEY FACTORS THAT TRIGGER CHURN PREDICTION (Probability > 0.5)

‚úÖ CUSTOMERS MOST LIKELY TO CHURN (High Risk):

   1. üåç GEOGRAPHY: Germany (very high churn risk)
   2. üë© GENDER: Female (higher churn than male)
   3. üìÖ AGE: Older customers (45+ years old, especially 60+)
   4. ‚è±Ô∏è TENURE: Short tenure (1-3 years, new customers at risk)
   5. üí∞ BALANCE: Low or zero balance
   6. üè¶ IsActiveMember: Inactive members (0) - much higher churn risk
   7. üí≥ NumOfProducts: Few products (1 product shows higher churn)

   EXAMPLE - HIGHEST CHURN RISK (95% probability):
   ‚Ä¢ Age: 65 years old
   ‚Ä¢ Geography: Germany
   ‚Ä¢ Gender: Female
   ‚Ä¢ Tenure: 2 years (new customer)
   ‚Ä¢ Balance: $100,000 (has money but inactive)
   ‚Ä¢ IsActiveMember: 0 (not using services actively)
   ‚Ä¢ NumOfProducts: 1 (uses only 1 product)
   ‚Ä¢ CreditScore: 500 (fair credit)

---

‚ùå CUSTOMERS LEAST LIKELY TO CHURN (Low Risk):

   1. üåç GEOGRAPHY: France or Spain
   2. üë® GENDER: Mal