# Importing all the required library 

In [111]:
import tensorflow as tf 
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np 
import streamlit as st

In [112]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle 

In [113]:
#Loading the Data 
data = pd.read_csv(r'C:\Users\nsany\Downloads\Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


# Preprocess the data 
## Dropping irrelevant data from the table


In [115]:
data.drop('RowNumber', axis=1, inplace=True)
data.head()

Unnamed: 0,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [116]:
data.drop('CustomerId', axis=1, inplace=True)
data.head()

Unnamed: 0,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [117]:
data.drop('Surname', axis=1, inplace=True)

In [118]:
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [119]:
# Encoding categorical variable 
label_encoder_gender = LabelEncoder()
data.Gender = label_encoder_gender.fit_transform(data['Gender'])
data.head(10)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0
5,645,Spain,1,44,8,113755.78,2,1,0,149756.71,1
6,822,France,1,50,7,0.0,2,1,1,10062.8,0
7,376,Germany,0,29,4,115046.74,4,1,0,119346.88,1
8,501,France,1,44,4,142051.07,2,0,1,74940.5,0
9,684,France,1,27,2,134603.88,1,1,1,71725.73,0


# One hot Encoding for Geography 


In [121]:
from sklearn.preprocessing import OneHotEncoder
ohe_geo = OneHotEncoder()
geo_encoder = ohe_geo.fit_transform(data[['Geography']])
geo_encoder

<10000x3 sparse matrix of type '<class 'numpy.float64'>'
	with 10000 stored elements in Compressed Sparse Row format>

## Checking the features of the One hot Encoded column in the database

In [123]:
ohe_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [124]:
geo_encoder.toarray()

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [125]:
geo_encoded_df = pd.DataFrame(geo_encoder.toarray(),columns=ohe_geo.get_feature_names_out(['Geography']))
geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


## Combining all the one hot encoded columns with the original Data

In [127]:
data = pd.concat([data.drop('Geography', axis=1), geo_encoded_df], axis=1)
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


# Saving the encoder file in Pickle format

In [129]:
with open('label_encoder_gender.pkl', 'wb') as file: 
    pickle.dump(label_encoder_gender, file)

with open('onehot_encoder_geo.pkl', 'wb') as file: 
    pickle.dump(ohe_geo, file)

# Dividing the Data Set into dependent and independent feature

In [131]:
X= data.drop('Exited', axis=1)
y = data['Exited']

# Splitting the Data set into Test and Train dataset 

In [133]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)

# Scaling the features
scalar = StandardScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)

In [134]:
X_train

array([[ 0.35649971,  0.91324755, -0.6557859 , ...,  1.00150113,
        -0.57946723, -0.57638802],
       [-0.20389777,  0.91324755,  0.29493847, ..., -0.99850112,
         1.72572313, -0.57638802],
       [-0.96147213,  0.91324755, -1.41636539, ..., -0.99850112,
        -0.57946723,  1.73494238],
       ...,
       [ 0.86500853, -1.09499335, -0.08535128, ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.15932282,  0.91324755,  0.3900109 , ...,  1.00150113,
        -0.57946723, -0.57638802],
       [ 0.47065475,  0.91324755,  1.15059039, ..., -0.99850112,
         1.72572313, -0.57638802]])

# Saving the Scaled file in form of Pickle  file 

In [136]:
with open('scalar.pkl', 'wb') as file:
    pickle.dump(scalar,file)

# Training the Data in ANN

In [138]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

# Building our ANN MODEL

In [140]:
(X_train.shape[1],) # Shape of the input Layer with will be the first Layer

(12,)

In [141]:
model = Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)), # First Hidden Layer connected with input layer
    Dense(32,activation='relu'), #Second Hidden Layer
    Dense(1,activation='sigmoid') # output Layer 
    
]                   
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [142]:
model.summary()

In [165]:
# Compiling the Model for Backward and Forward Propagation 
model.compile(optimizer="adam",loss="binary_crossentropy",metrics=['accuracy']) #using Adam optimizer with a fixed learning rate of the model

In [167]:
# Setup of the Tensor Board for training the Model 
log_dir = "logs/fit" +datetime.datetime.now().strftime("%Y%m%d-%H%M%S")  #create a directory of Log with time in the given format
tensorflow_callback = TensorBoard(log_dir=log_dir,histogram_freq=1) # training of the model is traced here with the help of log and graph 

# Set up of Early Stopping 

In [170]:
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) 
# monitoring the validation loss and having the patience to wait till 5 epochs

# Training the Model 

In [173]:
history = model.fit(
    X_train,y_train,validation_data=(X_test,y_test), epochs=100,
    callbacks = [tensorflow_callback,early_stopping_callback]
) 

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7898 - loss: 0.4821 - val_accuracy: 0.8395 - val_loss: 0.3841
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8383 - loss: 0.3895 - val_accuracy: 0.8590 - val_loss: 0.3506
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8597 - loss: 0.3544 - val_accuracy: 0.8595 - val_loss: 0.3442
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8698 - loss: 0.3304 - val_accuracy: 0.8580 - val_loss: 0.3459
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8618 - loss: 0.3423 - val_accuracy: 0.8590 - val_loss: 0.3454
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8566 - loss: 0.3496 - val_accuracy: 0.8620 - val_loss: 0.3426
Epoch 7/100
[1m250/25

# Saving the Model as per the Keras API 

In [176]:
model.save('model.keras')

In [178]:
# Loading the TensorBoard Extension 
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


# Visualization in TensorBoard

In [181]:
# Loading the Tesnor Board from the Log
%tensorboard --logdir logs/fit20250205-152803

# Prediction

##  Loading the Pickle File for training and streamling the entire AI Model 

In [183]:
# Loading the trained model, scaler, onehotEncoder and pickle file

from tensorflow.keras.models import load_model

model = load_model('model.keras')

# Loading Encoder and scalar
with open('label_encoder_gender.pkl', 'rb') as file: 
    label_encoder_gender = pickle.load(file)

with open('onehot_encoder_geo.pkl', 'rb') as file: 
    label_encoder_geo = pickle.load(file)

In [185]:
# Sample input 
input_data = {
    'CreditScore': 600, 
    'Geography': 'France',
    'Gender': 'Male',
    'Age': 40, 
    'Tenure': 3, 
    'Balance': 60000, 
    'NumOfProducts': 2,
    'HasCrCard': 1, 
    'IsActiveMember': 1,
    'EstimatedSalary': 5000
}

In [187]:
# Encoding the input data 
geo_encoded = label_encoder_geo.transform([[input_data['Geography']]]).toarray()
geo_encoded_df = pd.DataFrame(geo_encoded,columns=label_encoder_geo.get_feature_names_out(['Geography']))



In [189]:
geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0


# Combining the one-hot encoded columns with the input data 

## Converting the input_data to dataframe for the combination of Encoders to work

In [193]:
input_df = pd.DataFrame([input_data])
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,600,France,Male,40,3,60000,2,1,1,5000


# Encoding the Gender 

In [196]:
input_df['Gender'] = label_encoder_gender.transform(input_df['Gender'])
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,600,France,1,40,3,60000,2,1,1,5000


# Concat of onehot encoding data

In [199]:
input_df = pd.concat([input_df.drop("Geography", axis=1), geo_encoded_df],axis=1)
input_df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,1,40,3,60000,2,1,1,5000,1.0,0.0,0.0


# Scaling the input data

In [202]:
input_scaled = scalar.transform(input_df)
input_scaled

array([[-0.53598516,  0.91324755,  0.10479359, -0.69539349, -0.25781119,
         0.80843615,  0.64920267,  0.97481699, -1.65923237,  1.00150113,
        -0.57946723, -0.57638802]])

# Prediction of Customer Churn 

In [205]:
prediction = model.predict(input_scaled)
prediction 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


array([[0.02838976]], dtype=float32)

In [207]:
prediction_probablility = prediction[0][00] 
prediction_probablility

0.028389757

# Settign up the condition to check the prediction is above 0.5 or not 
## incase the prediction Probability is above 0.5 then the customer is likely to churn 

In [210]:
if prediction_probablility > 0.5: 
    print("The Customer is likely to Churn. ")
else: 
    print("The Customer is not likely to Churn.")
    

The Customer is not likely to Churn.


# Creating END To END WEB APP for the prediction 

## Loading the model 

In [214]:
model = tf.keras.models.load_model('model.keras')

# Loading Encoder and scalar
with open('label_encoder_gender.pkl', 'rb') as file: 
    label_encoder_gender = pickle.load(file)

with open('onehot_encoder_geo.pkl', 'rb') as file: 
    label_encoder_geo = pickle.load(file)

## StreamLit App 

In [217]:
# Header of the Web Application 
st.title('Customer Churn Prediction') # title of the App

DeltaGenerator()

## User Input and types of input choices 


In [220]:
geography = st.selectbox('Geography', ohe_geo.categories_[0])
gender = st.selectbox('Gender', label_encoder_gender.classes_)
age = st.slider('Age', 18, 92)
balance = st.number_input('Balance')
credit_score = st.number_input('Credit Score')
estimated_salary = st.number_input('Estimated Salary')
tenure = st.slider('Tenure', 0, 10)
num_of_products = st.slider('NUmber of Products', 1, 4)
has_cr_card = st.selectbox('Has Credit Card', [0, 1])
is_active_memeber = st.selectbox('Is Active Member', [0,1])

2025-02-05 15:30:13.872 Session state does not function when running a script without `streamlit run`


## Preparing the input Data in form of Dictionary

In [227]:
input_data = pd.DataFrame({
    'CreditScore': [credit_score], 
    'Gender': [label_encoder_gender.transform([gender])[0]],
    'Age': [age], 
    'Tenure': [tenure], 
    'Balance': [balance], 
    'NumOfProducts': [num_of_products],
    'HasCrCard': [has_cr_card], 
    'IsActiveMember': [is_active_memeber],
    'EstimatedSalary': [estimated_salary]
})

# Again Encoding the input data 
# geo_encoded = label_encoder_geo.transform([[input_data['Geography']]]).toarray()
# geo_encoded_df = pd.DataFrame(geo_encoded,columns=label_encoder_geo.get_feature_names_out(['Geography']))
#input_data = pd.concat([input_data.drop("Geography", axis=1), geo_encoded_df],axis=1)
# input_scaled = scalar.transform(input_data)

# Prediction Churn 
prediction = model.predict(input_scaled)
prediction_probablility = prediction[0][00] 

if prediction_probablility > 0.5: 
    print("The Customer is likely to Churn. ")
else: 
    print("The Customer is not likely to Churn.")
    

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step
The Customer is not likely to Churn.
