In [13]:
import pandas as pd
import numpy as np
import joblib
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import roc_auc_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


In [16]:
BASE_DIR = Path(r"C:\Users\rupes\Downloads\customer_churn")
DATA_DIR = BASE_DIR / "data"
MODELS_DIR = BASE_DIR / "models"
MODELS_DIR.mkdir(exist_ok=True)


In [18]:
df = pd.read_csv(DATA_DIR / "telco_churn_processed_for_modeling.csv")
df.head()


Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,avg_charges_per_month,num_services,is_new_customer,tenure_bin
0,Female,0,Yes,No,1,No,No,DSL,No,Yes,...,Month-to-month,Yes,Electronic check,29.85,29.85,No,29.85,1,1,0-3
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,...,One year,No,Mailed check,56.95,1889.5,No,55.573529,3,0,25-48
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,...,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,54.075,3,1,0-3
3,Male,0,No,No,45,No,No,DSL,Yes,No,...,One year,No,Bank transfer (automatic),42.3,1840.75,No,40.905556,3,0,25-48
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,...,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,75.825,1,1,0-3


In [19]:
y = df['Churn'].map({'Yes':1, 'No':0})
X = df.drop(columns=['Churn'])


In [20]:
cat_cols = X.select_dtypes(include='object').columns.tolist()

telco_encoders = {}

for col in cat_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col].astype(str))
    telco_encoders[col] = le


In [21]:
joblib.dump(telco_encoders, MODELS_DIR / "telco_label_encoders.pkl")


['C:\\Users\\rupes\\Downloads\\customer_churn\\models\\telco_label_encoders.pkl']

In [22]:
TELCO_FEATURE_ORDER = X.columns.tolist()
joblib.dump(TELCO_FEATURE_ORDER, MODELS_DIR / "telco_feature_order.pkl")


['C:\\Users\\rupes\\Downloads\\customer_churn\\models\\telco_feature_order.pkl']

In [23]:
scaler_telco = StandardScaler()
X_scaled = scaler_telco.fit_transform(X)

joblib.dump(scaler_telco, MODELS_DIR / "telco_scaler.pkl")


['C:\\Users\\rupes\\Downloads\\customer_churn\\models\\telco_scaler.pkl']

In [24]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=42
)


In [25]:
telco_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [26]:
telco_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


In [27]:
es = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

mc = ModelCheckpoint(
    MODELS_DIR / "telco_churn_model.keras",
    monitor='val_loss',
    save_best_only=True
)


In [28]:
history = telco_model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=50,
    batch_size=128,
    callbacks=[es, mc],
    verbose=2
)


Epoch 1/50
40/40 - 6s - 162ms/step - accuracy: 0.7290 - loss: 0.5310 - val_accuracy: 0.7837 - val_loss: 0.4504
Epoch 2/50
40/40 - 1s - 21ms/step - accuracy: 0.7917 - loss: 0.4351 - val_accuracy: 0.7642 - val_loss: 0.4489
Epoch 3/50
40/40 - 1s - 18ms/step - accuracy: 0.8010 - loss: 0.4206 - val_accuracy: 0.7748 - val_loss: 0.4445
Epoch 4/50
40/40 - 1s - 16ms/step - accuracy: 0.8051 - loss: 0.4172 - val_accuracy: 0.7748 - val_loss: 0.4440
Epoch 5/50
40/40 - 1s - 15ms/step - accuracy: 0.8079 - loss: 0.4138 - val_accuracy: 0.7819 - val_loss: 0.4444
Epoch 6/50
40/40 - 1s - 15ms/step - accuracy: 0.8085 - loss: 0.4096 - val_accuracy: 0.7766 - val_loss: 0.4493
Epoch 7/50
40/40 - 1s - 16ms/step - accuracy: 0.8112 - loss: 0.4088 - val_accuracy: 0.7766 - val_loss: 0.4507
Epoch 8/50
40/40 - 1s - 17ms/step - accuracy: 0.8105 - loss: 0.4107 - val_accuracy: 0.7855 - val_loss: 0.4480
Epoch 9/50
40/40 - 1s - 14ms/step - accuracy: 0.8120 - loss: 0.4060 - val_accuracy: 0.7819 - val_loss: 0.4488


In [29]:
y_pred = telco_model.predict(X_test).ravel()
print("TELCO ROC AUC:", roc_auc_score(y_test, y_pred))


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step
TELCO ROC AUC: 0.8419463173938878


In [31]:
df = pd.read_csv(DATA_DIR / "Customer-Churn-Records-bank.csv")
df.head()


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Complain,Satisfaction Score,Card Type,Point Earned
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,1,2,DIAMOND,464
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,1,3,DIAMOND,456
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,1,3,DIAMOND,377
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,5,GOLD,350
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0,5,GOLD,425


In [32]:
df['AgeGroup'] = pd.cut(
    df['Age'],
    bins=[0,25,35,45,55,200],
    labels=['0-25','26-35','36-45','46-55','56+']
)

df['BalanceSalaryRatio'] = df['Balance'] / (df['EstimatedSalary'] + 1)
df['LoyaltyScore'] = df['Tenure'] * df['NumOfProducts']


In [33]:
y = df['Exited']
X = df.drop(columns=['Exited','RowNumber','CustomerId','Surname'])


In [34]:
BANK_NUM_COLS = [
    'CreditScore','Age','Tenure','Balance',
    'NumOfProducts','EstimatedSalary',
    'Satisfaction Score','Point Earned',
    'BalanceSalaryRatio','LoyaltyScore'
]

BANK_CAT_COLS = [
    'Geography','Gender','Card Type','AgeGroup'
]


In [35]:
bank_encoders = {}

for col in BANK_CAT_COLS:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col].astype(str))
    bank_encoders[col] = le

joblib.dump(bank_encoders, MODELS_DIR / "bank_label_encoders.pkl")


['C:\\Users\\rupes\\Downloads\\customer_churn\\models\\bank_label_encoders.pkl']

In [36]:
bank_cardinalities = {
    col: int(X[col].nunique() + 1) for col in BANK_CAT_COLS
}

joblib.dump(bank_cardinalities, MODELS_DIR / "bank_cardinalities.pkl")


['C:\\Users\\rupes\\Downloads\\customer_churn\\models\\bank_cardinalities.pkl']

In [37]:
scaler_bank = StandardScaler()
X_num = scaler_bank.fit_transform(X[BANK_NUM_COLS])

joblib.dump(scaler_bank, MODELS_DIR / "bank_scaler.pkl")


['C:\\Users\\rupes\\Downloads\\customer_churn\\models\\bank_scaler.pkl']

In [38]:
X_cat = [X[col].values for col in BANK_CAT_COLS]
y = y.values


In [39]:
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate
from tensorflow.keras.models import Model


In [40]:
inputs_cat = []
embeds = []

for col in BANK_CAT_COLS:
    inp = Input(shape=(1,), name=f"input_{col}")
    emb = Embedding(
        bank_cardinalities[col],
        min(4, bank_cardinalities[col]//2)
    )(inp)
    emb = Flatten()(emb)
    inputs_cat.append(inp)
    embeds.append(emb)

numeric_input = Input(shape=(X_num.shape[1],), name="numeric_input")

x = Concatenate()([numeric_input] + embeds)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)

bank_model = Model(inputs=[numeric_input] + inputs_cat, outputs=x)


In [41]:
bank_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


In [42]:
X_train_num, X_test_num, y_train, y_test = train_test_split(
    X_num, y, test_size=0.2, stratify=y, random_state=42
)

X_train_cat = [X[col].iloc[X_train_num.shape[0]*0: X_train_num.shape[0]].values for col in BANK_CAT_COLS]


In [43]:
bank_model.fit(
    [X_train_num] + X_train_cat,
    y_train,
    validation_split=0.1,
    epochs=50,
    batch_size=128,
    callbacks=[EarlyStopping(patience=5, restore_best_weights=True)],
    verbose=2
)


Epoch 1/50
57/57 - 7s - 124ms/step - accuracy: 0.7917 - loss: 0.5003 - val_accuracy: 0.7950 - val_loss: 0.4696
Epoch 2/50
57/57 - 1s - 14ms/step - accuracy: 0.7892 - loss: 0.4485 - val_accuracy: 0.8025 - val_loss: 0.4280
Epoch 3/50
57/57 - 1s - 13ms/step - accuracy: 0.8054 - loss: 0.4262 - val_accuracy: 0.8163 - val_loss: 0.3939
Epoch 4/50
57/57 - 1s - 14ms/step - accuracy: 0.8194 - loss: 0.4105 - val_accuracy: 0.8275 - val_loss: 0.3825
Epoch 5/50
57/57 - 1s - 11ms/step - accuracy: 0.8232 - loss: 0.3975 - val_accuracy: 0.8425 - val_loss: 0.3788
Epoch 6/50
57/57 - 1s - 13ms/step - accuracy: 0.8268 - loss: 0.3925 - val_accuracy: 0.8375 - val_loss: 0.3748
Epoch 7/50
57/57 - 1s - 14ms/step - accuracy: 0.8321 - loss: 0.3851 - val_accuracy: 0.8450 - val_loss: 0.3778
Epoch 8/50
57/57 - 1s - 15ms/step - accuracy: 0.8336 - loss: 0.3795 - val_accuracy: 0.8462 - val_loss: 0.3719
Epoch 9/50
57/57 - 1s - 13ms/step - accuracy: 0.8336 - loss: 0.3796 - val_accuracy: 0.8425 - val_loss: 0.3737
Epoch 10/

<keras.src.callbacks.history.History at 0x1d2f0c96d20>

In [44]:
bank_model.save(MODELS_DIR / "bank_churn_model_emb.keras")


In [45]:
print(TELCO_FEATURE_ORDER)


['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'avg_charges_per_month', 'num_services', 'is_new_customer', 'tenure_bin']
