In [252]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle 


In [253]:
data = pd.read_csv("Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [254]:
##Preprocessing the data - will drop irrelevant features
data = data.drop(['RowNumber','CustomerId','Surname'],axis=1)
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [255]:
data['Gender'].value_counts()

Gender
Male      5457
Female    4543
Name: count, dtype: int64

In [256]:
data.Geography.value_counts()

Geography
France     5014
Germany    2509
Spain      2477
Name: count, dtype: int64

In [257]:
from sklearn.model_selection import train_test_split
X = data.drop('Exited',axis=1)
y = data['Exited']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train.shape,X_test.shape

((8000, 10), (2000, 10))

In [258]:
numerical_col = [n for n in X_train.columns if X_train[n].dtype!='O']
multiclass_col = ['Geography']
binaryclass_col = ['Gender']

In [259]:
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder
from sklearn.compose import ColumnTransformer
preprocessor = ColumnTransformer(
    transformers = [
        ('geo',OneHotEncoder(),multiclass_col),
        ('gender',OrdinalEncoder(),binaryclass_col),
        ('scaler',StandardScaler(),numerical_col)
    ],
    remainder='passthrough'
)


In [260]:
X_train = preprocessor.fit_transform(X_train)

In [261]:
X_test = preprocessor.transform(X_test)

In [262]:
with open('preprocessor.pkl','wb') as file:
    pickle.dump(preprocessor,file)

In [263]:
from tensorflow.keras.models import Model
import tensorflow as tf 
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

input_src = Input(shape=(X_train.shape[1]))
hl = Dense(64,activation='relu')(input_src)
hl = Dense(32,activation='relu')(hl)
output = Dense(1,activation='sigmoid')(hl)
model = Model(inputs=input_src,outputs=output)
model.summary()

Model: "model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_7 (InputLayer)        [(None, 12)]              0         
                                                                 
 dense_18 (Dense)            (None, 64)                832       
                                                                 
 dense_19 (Dense)            (None, 32)                2080      
                                                                 
 dense_20 (Dense)            (None, 1)                 33        
                                                                 
Total params: 2,945
Trainable params: 2,945
Non-trainable params: 0
_________________________________________________________________


In [264]:
from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(learning_rate=0.001),loss='binary_crossentropy',metrics=['accuracy'])


In [265]:
#setup tensorboard
import datetime

log_dir = "logs/fit"##+datetime.datetime.now().strftime("%Y%m%d-%H%M%S") ##Tensorboard visualize logs
tf_callback = TensorBoard(log_dir=log_dir,histogram_freq=1)


In [266]:
early_stopping_callback = EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)


In [267]:
#Training the model
history = model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=100,callbacks=[tf_callback,early_stopping_callback])


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


In [268]:
model.save('model.h5')

In [269]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [270]:
%tensorboard --logdir logs\fit20250514-102023\train

Reusing TensorBoard on port 6009 (pid 24056), started 5:46:24 ago. (Use '!kill 24056' to kill it.)

In [271]:
### Load the trained Model, scaler pickle, onehot
from tensorflow.keras.models import load_model

model = load_model("model.h5")

In [272]:
with open('preprocessor.pkl','rb') as file:
    preprocessor = pickle.load(file)


In [273]:
#Example input data to test data

input_data = pd.DataFrame([{
    'CreditScore': 600,
    'Geography': 'France',
    'Gender': 'Male',
    'Age': 40,
    'Tenure': 3,
    'Balance': 60000,
    'NumOfProducts':1,
    'HasCrCard':1,
    'IsActiveMember': 1,
    'EstimatedSalary': 50000

}])

In [274]:

input_transformed = preprocessor.transform(input_data) 

In [275]:
pred = model.predict(input_transformed)



In [276]:
pred_prob = pred[0][0]

In [277]:
if pred_prob>=0.5:
    print("The Customer will churn")
else:
    print("the Customer will not churn")

the Customer will not churn
