In [1]:
import pandas as pd
import numpy as np

# Load and preprocess the data
data=pd.read_csv('../data/synthetic_customer_churn_100k.csv')
data["Gender"]=data["Gender"].apply(lambda x: 0 if x=='Female' else 1 if x=='Male' else 0)
data["Churn"]=data["Churn"].apply(lambda x: 0 if x=='No' else 1 if x=='Yes' else 0)
one_hot=pd.get_dummies(data[["PaymentMethod", "Contract"]]).astype(int)
data = pd.concat([data.drop(columns=["PaymentMethod", "Contract"]), one_hot], axis=1)

data=data.drop(columns=["CustomerID"])

In [2]:
# Split the data
from sklearn.model_selection import train_test_split

X = data.drop(columns=["Churn"])
y = data["Churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [3]:
# Standardize numerical features
from sklearn.preprocessing import StandardScaler
numeric_cols = ["Age", "Tenure", "MonthlyCharges", "TotalCharges"]
scaler = StandardScaler()
X_train[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
X_test[numeric_cols] = scaler.transform(X_test[numeric_cols])

In [5]:
X_train.shape[1]

12

In [4]:
# Build the model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

model = Sequential([
    Dense(64, activation="relu", input_shape=(X_train.shape[1],)),
    Dropout(0.3),

    Dense(32, activation="relu"),
    Dropout(0.2),

    Dense(1, activation="sigmoid")  # binary output
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Training stops automatically when improvement stops.
early_stop = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

#LR patience smaller than EarlyStopping
#LR reduces before training stops

lr_scheduler = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,
    patience=2,
    min_lr=1e-5,
    verbose=1
)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=256,
    callbacks=[early_stop, lr_scheduler],
    verbose=1
)

Epoch 1/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7017 - loss: 0.5553 - val_accuracy: 0.7420 - val_loss: 0.5047 - learning_rate: 0.0010
Epoch 2/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7324 - loss: 0.5088 - val_accuracy: 0.7434 - val_loss: 0.4921 - learning_rate: 0.0010
Epoch 3/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7350 - loss: 0.5012 - val_accuracy: 0.7492 - val_loss: 0.4865 - learning_rate: 0.0010
Epoch 4/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7390 - loss: 0.4942 - val_accuracy: 0.7517 - val_loss: 0.4815 - learning_rate: 0.0010
Epoch 5/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7419 - loss: 0.4901 - val_accuracy: 0.7472 - val_loss: 0.4797 - learning_rate: 0.0010
Epoch 6/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [7]:
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.metrics import confusion_matrix

# Evaluate loss & accuracy
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_acc:.4f}")

# Predictions
y_prob = model.predict(X_test).ravel()
y_pred = (y_prob > 0.5).astype(int)

# Metrics
print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))
print(confusion_matrix(y_test, y_pred))

Test Accuracy: 0.7618
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 670us/step
              precision    recall  f1-score   support

           0       0.80      0.86      0.83     13371
           1       0.67      0.55      0.61      6629

    accuracy                           0.76     20000
   macro avg       0.73      0.71      0.72     20000
weighted avg       0.75      0.76      0.76     20000

ROC-AUC: 0.8045606713154811
[[11558  1813]
 [ 2951  3678]]
