In [1]:
import pandas as pd
import numpy as np
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.preprocessing import OneHotEncoder

In [2]:
df=pd.read_csv('dataset_ml/Bank_Customer_churn.zip')

In [3]:
df.head()

Unnamed: 0,customer_id,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn
0,15634602,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,15647311,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,15619304,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,15701354,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,15737888,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
# Initialize encoder
encoder = OneHotEncoder(sparse_output=False)
encoded = encoder.fit_transform(df[["country","gender"]])


In [5]:
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(["country", "gender"]))

In [6]:
df.drop(columns=["country", "gender"], inplace=True)
df[encoded_df.columns] = encoded_df

In [7]:
df.head()

Unnamed: 0,customer_id,credit_score,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn,country_France,country_Germany,country_Spain,gender_Female,gender_Male
0,15634602,619,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0,1.0,0.0
1,15647311,608,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0,1.0,0.0
2,15619304,502,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0,1.0,0.0
3,15701354,699,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0,1.0,0.0
4,15737888,850,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0,1.0,0.0


In [8]:
df.isnull().sum()

customer_id         0
credit_score        0
age                 0
tenure              0
balance             0
products_number     0
credit_card         0
active_member       0
estimated_salary    0
churn               0
country_France      0
country_Germany     0
country_Spain       0
gender_Female       0
gender_Male         0
dtype: int64

In [9]:
df.describe()

Unnamed: 0,customer_id,credit_score,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn,country_France,country_Germany,country_Spain,gender_Female,gender_Male
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,15690940.0,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037,0.5014,0.2509,0.2477,0.4543,0.5457
std,71936.19,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769,0.500023,0.433553,0.431698,0.497932,0.497932
min,15565700.0,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0,0.0,0.0,0.0,0.0,0.0
25%,15628530.0,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0,0.0,0.0,0.0,0.0,0.0
50%,15690740.0,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0,1.0,0.0,0.0,0.0,1.0
75%,15753230.0,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0,1.0,1.0,0.0,1.0,1.0
max,15815690.0,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0,1.0,1.0,1.0,1.0,1.0


In [10]:
print(df.shape)   # total rows, total columns
print(df.columns) # check column names

(10000, 15)
Index(['customer_id', 'credit_score', 'age', 'tenure', 'balance',
       'products_number', 'credit_card', 'active_member', 'estimated_salary',
       'churn', 'country_France', 'country_Germany', 'country_Spain',
       'gender_Female', 'gender_Male'],
      dtype='object')


In [11]:
x=df.drop(columns=['churn'])
y=df['churn']

In [12]:
scaler=StandardScaler()
x_scaled=scaler.fit_transform(x)

In [13]:
model=Sequential([Dense(128,activation='relu'),
                  BatchNormalization(),
                  Dropout(0.5),
                  Dense(64,activation='relu'),
                  BatchNormalization(),
                  Dropout(0.3),
                  Dense(32,activation='relu'),
                  BatchNormalization(),
                  Dropout(0.2),
                  Dense(1,activation='sigmoid')])

In [14]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [39]:
hist=model.fit(x, y, validation_split=0.2, epochs=20, batch_size=32)

Epoch 1/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7925 - loss: 0.5107 - val_accuracy: 0.8050 - val_loss: 0.4938
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7894 - loss: 0.5148 - val_accuracy: 0.8050 - val_loss: 0.4937
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7932 - loss: 0.5097 - val_accuracy: 0.8050 - val_loss: 0.4938
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7995 - loss: 0.5014 - val_accuracy: 0.8050 - val_loss: 0.4939
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7930 - loss: 0.5099 - val_accuracy: 0.8050 - val_loss: 0.4937
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8033 - loss: 0.4962 - val_accuracy: 0.8050 - val_loss: 0.4938
Epoch 7/20
[1m250/250[0m 

In [37]:
loss, acc = model.evaluate(x, y, verbose=0)
print(f"Test Accuracy: {acc*100:.2f}%")

Test Accuracy: 79.63%
