In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

In [7]:
# from google.colab import files
# files.upload()


In [8]:
data = pd.read_csv('Churn_Modelling.csv')
data.head()


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [9]:
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

In [10]:
label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])

In [11]:
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo = OneHotEncoder()
geo_encoded = onehot_encoder_geo.fit_transform(data[['Geography']])

In [12]:
geo_encoded_df = pd.DataFrame(geo_encoded.toarray(), columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

In [13]:
data = pd.concat([data.drop('Geography', axis=1), geo_encoded_df], axis=1)

In [14]:
with open('label_encoder_gender.pickle', 'wb') as file:
    pickle.dump(label_encoder_gender, file)
with open('onehot_encoder_geo.pickle', 'wb') as file:
    pickle.dump(onehot_encoder_geo, file)

In [15]:
X = data.drop('Exited', axis=1)
y = data['Exited']

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [18]:
with open('scaler.pickle', 'wb') as file:
    pickle.dump(scaler, file)

In [19]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime




In [20]:
(X_train.shape[1],)

(12,)

## BUILD ANN MODEL

In [21]:
model = Sequential()
model.add(Dense(units=64, activation='relu', input_shape=(X_train.shape[1],))) ## hidden layer 1
model.add(Dense(units=32, activation='relu'))         ## hidden layer 2
model.add(Dense(units=1, activation='sigmoid'))       ## output layer isliye 1 unit and sigmoid activation fn




In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                832       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2945 (11.50 KB)
Trainable params: 2945 (11.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [23]:
import tensorflow
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.001)
loss = tensorflow.keras.losses.BinaryCrossentropy()
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])


In [24]:
log_dir = "logs/fit" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [25]:
## set up early stopping: agr loss fn ki value reduce hona band ho jaye toh stop then and there

early_stop = EarlyStopping(monitor='val_loss',patience=10, restore_best_weights=True)

In [26]:
## Training the model

history = model.fit(
    X_train, y_train, validation_data=(X_test, y_test),
    epochs=100,
    batch_size=32,
    callbacks=[early_stop, tensorboard_callback]
)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100


In [27]:
model.save('model.h5')

  saving_api.save_model(


In [28]:
%load_ext tensorboard

In [29]:
## Prediction 

import streamlit as st
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import pandas as pd
import pickle

In [30]:
# Load the trained model
model = tf.keras.models.load_model('model.h5')

In [34]:
# Load the encoders and scaler
with open('label_encoder_gender.pickle', 'rb') as file:
    label_encoder_gender = pickle.load(file)

with open('onehot_encoder_geo.pickle', 'rb') as file:
    onehot_encoder_geo = pickle.load(file)

with open('scaler.pickle', 'rb') as file:
    scaler = pickle.load(file)


In [87]:
input_data = {
    'CreditScore': 600,
    'Geography':'France',
    'Gender': 'Male',
    'Age': 23,
    'Tenure': 2,
    'Balance': 10000,
    'NumOfProducts': 1,
    'HasCrCard': 1,
    'IsActiveMember': 0,
    'EstimatedSalary': 60000
}

input_data = pd.DataFrame([input_data])


In [88]:
geo_encoded = onehot_encoder_geo.transform(input_data[['Geography']]).toarray()
geo_encoded_df = pd.DataFrame(geo_encoded, columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

In [89]:
input_data = pd.concat([input_data.reset_index(drop=True), geo_encoded_df], axis = 1)

In [90]:
input_df = pd.DataFrame(input_data)
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,France,Male,23,2,10000,1,1,0,60000,1.0,0.0,0.0


In [91]:
input_df['Gender'] = label_encoder_gender.transform(input_df['Gender'])
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,France,1,23,2,10000,1,1,0,60000,1.0,0.0,0.0


In [92]:
input_df = input_df.drop(columns=['Geography'])
input_df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,1,23,2,10000,1,1,0,60000,1.0,0.0,0.0


In [93]:
input_scaled = scaler.transform(input_df)
input_scaled

array([[-0.53598516,  0.91324755, -1.51143782, -1.04241787, -1.05836066,
        -0.91668767,  0.64920267, -1.02583358, -0.70296551,  1.00150113,
        -0.57946723, -0.57638802]])

In [94]:
prediction = model.predict(input_scaled)
prediction



array([[0.05062813]], dtype=float32)

In [95]:
pred_prb = prediction[0][0]
pred_prb

0.050628126

In [96]:
if(pred_prb > 0.5):
    print("The customer is likely to churn")
else:
    print("Not likely to churn")

Not likely to churn
