In [24]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

In [25]:
SEED = 42
os.environ["PYTHONSSHSEED"] = str(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
print("TensorFlow: ", tf.__version__)

TensorFlow:  2.19.0


In [26]:
dataset = pd.read_csv("Churn_Modelling.csv")
X = dataset.iloc[:, 3:-1].copy()
y = dataset.iloc[:, -1].values
print("Shapes", X.shape, y.shape)

Shapes (10000, 10) (10000,)


In [27]:
ct = ColumnTransformer(
      transformers=[(("geo_ohe"), OneHotEncoder(handle_unknown="ignore"), [1]),
                    (("gender_ohe"), OneHotEncoder(handle_unknown="ignore"), [2])],
      remainder="passthrough",
)
X_ohe = ct.fit_transform(X)
X_ohe = np.asarray(X_ohe).astype("float32")

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X_ohe, y, test_size=0.2, random_state=SEED, stratify=y)

sc = StandardScaler(with_mean=False)
X_train = sc.fit_transform(X_train).astype("float32")
X_test = sc.transform(X_test).astype("float32")

In [32]:
from tensorflow.keras import layers, models, callbacks
def build_model(input_dim):
  model = models.Sequential([
      layers.Dense(16, activation="relu", input_shape=(input_dim,)),
      layers.Dense(16, activation="relu"),
      layers.Dense(1, activation="sigmoid")
  ])
  model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", tf.keras.metrics.AUC(name="auc")], )
  return model

ann = build_model(input_dim=X_train.shape[1])
ann.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [37]:
cb = [

    callbacks.EarlyStopping(monitor="val_auc", patience=10, mode="max", restore_best_weights=True),
    callbacks.ModelCheckpoint("best_ann.keras", monitor="val_auc", save_best_only=True, mode="max"),
    callbacks.TensorBoard(log_dir="logs")
]


[<keras.src.callbacks.early_stopping.EarlyStopping at 0x798d0b8cf740>,
 <keras.src.callbacks.model_checkpoint.ModelCheckpoint at 0x798d0b5f5fd0>,
 <keras.src.callbacks.tensorboard.TensorBoard at 0x798d0b5f7c80>]

In [38]:
history = ann.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=cb)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.7908 - auc: 0.5303 - loss: 0.5172 - val_accuracy: 0.8085 - val_auc: 0.7370 - val_loss: 0.4468
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8078 - auc: 0.7445 - loss: 0.4414 - val_accuracy: 0.8180 - val_auc: 0.7654 - val_loss: 0.4277
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8156 - auc: 0.7676 - loss: 0.4244 - val_accuracy: 0.8250 - val_auc: 0.7886 - val_loss: 0.4128
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8272 - auc: 0.7928 - loss: 0.4080 - val_accuracy: 0.8295 - val_auc: 0.8135 - val_loss: 0.3951
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8386 - auc: 0.8155 - loss: 0.3895 - val_accuracy: 0.8370 - val_auc: 0.8327 - val_loss: 0.3781
Epoch 6/100
[1m250/250

In [None]:
y_proba = ann.predict(X_test).ravel(
y_pred
)

In [39]:
loss, accuracy, auc = ann.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test AUC: {auc:.4f}")

Test Loss: 0.3343
Test Accuracy: 0.8615
Test AUC: 0.8652
