In [113]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle

In [114]:
## Load the dataset
df=pd.read_csv("../Churn_Modelling.csv")
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [115]:
X = df.drop(columns=['RowNumber','CustomerId','Surname','Exited'],axis=1)
Y = df['Exited'] 

In [116]:
from sklearn.model_selection import train_test_split

x_train , x_test,y_train,y_test = train_test_split(X,Y , test_size=0.2,random_state=42)

In [117]:
x_train.shape , x_test.shape,y_train.shape,y_test.shape

((8000, 10), (2000, 10), (8000,), (2000,))

In [118]:
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder,StandardScaler
from sklearn.compose import ColumnTransformer

numeric_transformer = StandardScaler()
oh_transformer = OneHotEncoder(drop='first')
label_transformer = OrdinalEncoder()

In [119]:
cat_feat = [feat for feat in x_train.columns if X[feat].dtypes == 'O']
num_feat = [feat for feat in x_train.columns if X[feat].dtypes != 'O']
bin_feat = [feat for feat in cat_feat if x_train[feat].nunique() == 2]
cat_feat = [feat for feat in cat_feat if x_train[feat].nunique() > 2]

In [120]:
preprocesser = ColumnTransformer([
        ('OneHotEncoder',oh_transformer,cat_feat),
        ('OrdinalEncoder',label_transformer,bin_feat),
        ('StandardScaler',numeric_transformer,num_feat)
])

In [121]:
x_train = preprocesser.fit_transform(x_train)


In [122]:
x_train

array([[ 0.        ,  0.        ,  1.        , ...,  0.64920267,
         0.97481699,  1.36766974],
       [ 1.        ,  0.        ,  1.        , ...,  0.64920267,
         0.97481699,  1.6612541 ],
       [ 0.        ,  1.        ,  1.        , ...,  0.64920267,
        -1.02583358, -0.25280688],
       ...,
       [ 0.        ,  0.        ,  0.        , ..., -1.54035103,
        -1.02583358, -0.1427649 ],
       [ 0.        ,  0.        ,  1.        , ...,  0.64920267,
        -1.02583358, -0.05082558],
       [ 1.        ,  0.        ,  1.        , ...,  0.64920267,
         0.97481699, -0.81456811]])

In [123]:
x_test = preprocesser.transform(x_test)

In [124]:
x_test

array([[ 1.        ,  0.        ,  1.        , ..., -1.54035103,
        -1.02583358, -1.01960511],
       [ 0.        ,  0.        ,  1.        , ...,  0.64920267,
         0.97481699,  0.79888291],
       [ 0.        ,  1.        ,  0.        , ...,  0.64920267,
        -1.02583358, -0.72797953],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  0.64920267,
        -1.02583358, -1.16591585],
       [ 0.        ,  0.        ,  1.        , ...,  0.64920267,
        -1.02583358, -0.41163463],
       [ 1.        ,  0.        ,  1.        , ...,  0.64920267,
         0.97481699,  0.12593183]])

In [125]:
import pickle

with open('../preprocesser.pkl' , 'wb') as file :
    pickle.dump(preprocesser ,file)

In [126]:
import tensorflow as tf
from tensorflow.keras.models import Sequential  
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import TensorBoard,EarlyStopping
from datetime import datetime

In [127]:
model = Sequential([
    Dense(64,activation='relu',input_shape =(x_train.shape[1],)),
    Dense(32,activation='relu'),
    Dense(1,activation=tf.keras.activations.sigmoid)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [128]:
model.summary()

In [129]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [130]:
log_dir="../logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [131]:
## Set up Early Stopping
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)


In [132]:
history=model.fit(
    x_train,y_train,validation_data=(x_test,y_test),epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8086 - loss: 0.4480 - val_accuracy: 0.8375 - val_loss: 0.3892
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8446 - loss: 0.3773 - val_accuracy: 0.8570 - val_loss: 0.3548
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8558 - loss: 0.3506 - val_accuracy: 0.8610 - val_loss: 0.3454
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8589 - loss: 0.3415 - val_accuracy: 0.8610 - val_loss: 0.3407
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8612 - loss: 0.3367 - val_accuracy: 0.8635 - val_loss: 0.3495
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8599 - loss: 0.3324 - val_accuracy: 0.8570 - val_loss: 0.3439
Epoch 7/100
[1m250/25

In [133]:
model.save('../model.h5')



In [134]:
%load_ext tensorboard

In [138]:
%tensorboard --logdir ../logs/fit

Reusing TensorBoard on port 6007 (pid 16268), started 0:00:03 ago. (Use '!kill 16268' to kill it.)