In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
import tensorflow_addons as tfa
from sklearn.preprocessing import StandardScaler

### Read data and preprocess

In [5]:
df=pd.read_csv('churn_modeling.csv')
print(df.shape)
df.head()

(10000, 14)


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [7]:
df[['Exited','CustomerId']].groupby('Exited').count()

Unnamed: 0_level_0,CustomerId
Exited,Unnamed: 1_level_1
0,7963
1,2037


In [8]:
neg, pos = np.bincount(df['Exited'])
total = neg + pos
print('Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

Total: 10000
    Positive: 2037 (20.37% of total)



In [66]:
df['Gender']=df['Gender'].apply(lambda x:1 if x=='Male' else 0)

In [67]:
X,y=df.drop(['Exited','RowNumber','CustomerId','Surname','Geography'],1),df['Exited']

In [68]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)


In [69]:
scaler = StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test= scaler.transform(X_test)


### model 1

In [105]:
metrics = [  
    tf.metrics.BinaryAccuracy('Accuracy'),
    tf.metrics.Precision(name='Precision'),
    tf.metrics.Recall(name='Recall')]

model1=keras.models.Sequential()

model1.add(layers.BatchNormalization())
model1.add(layers.Dropout(0.3))
model1.add(layers.Dense(512, activation='relu', name='hidden_1'))
model1.add(layers.Dense(1, activation='sigmoid', name='output'))

model1.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=metrics)

In [187]:


checkpoint_cb = keras.callbacks.ModelCheckpoint("my_keras_model1.h5", save_best_only=True)

# Creat EarlyStopping checkpoint:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)


model1.fit(X_train, y_train,
                    epochs=150,
                     batch_size=1024,
                     validation_split=0.2,
                    callbacks=[checkpoint_cb, early_stopping_cb])

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150


<keras.callbacks.History at 0x7f9521101810>

In [190]:
 eval=model1.evaluate(X_train, y_train, batch_size=1024, verbose=0)
 print('Training: Los= {:0.4f}, Accuracy= {:0.4f}, Precision= {:0.4f}, Recall={:0.4f}'.format(eval[0],eval[1],eval[2],eval[3]))

Training: Los= 0.3517, Accuracy= 0.8551, Precision= 0.8625, Recall=0.3510


In [194]:
eval=model1.evaluate(X_test, y_test, verbose=0)
print('Testing: Los= {:0.4f}, Accuracy= {:0.4f}, Precision= {:0.4f}, Recall={:0.4f}'.format(eval[0],eval[1],eval[2],eval[3]))

Training: Los= 0.3468, Accuracy= 0.8560, Precision= 0.8221, Recall=0.3410


### model 2
Try to add inital bias to output layer and weights to deal with imbalanced data

In [88]:
initial_bias = np.log([pos/neg])
output_bias = tf.keras.initializers.Constant(initial_bias)
initial_bias

array([-1.36332768])

In [162]:
weight_for_0 = (1 / neg) * (total / 8.0)
weight_for_1 =(1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}
class_weight

{0: 0.15697601406505088, 1: 2.454590083456063}

In [183]:
model2=keras.models.Sequential([
      keras.layers.Dense(64, activation='relu',input_shape=(X_train.shape[-1],)),
       keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(16, activation='relu'),
      keras.layers.Dense(1, activation='sigmoid',bias_initializer=output_bias),

])



In [184]:

metrics = [  
    tf.metrics.BinaryAccuracy('Accuracy'),
    tf.metrics.Precision(name='Precision'),
    tf.metrics.Recall(name='Recall')]


checkpoint_cb = keras.callbacks.ModelCheckpoint("my_keras_model2.h5", save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

model2.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = metrics)
model2.fit(X_train, y_train,validation_split=0.2, batch_size = 1024, epochs = 150,callbacks=[checkpoint_cb,early_stopping_cb])  


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150


<keras.callbacks.History at 0x7f9521b1da90>

In [195]:
eval=model2.evaluate(X_train, y_train, batch_size=1024, verbose=0)
print('Training: Los= {:0.4f}, Accuracy= {:0.4f}, Precision= {:0.4f}, Recall={:0.4f}'.format(eval[0],eval[1],eval[2],eval[3]))

Training: Los= 0.3211, Accuracy= 0.8687, Precision= 0.7845, Recall=0.4982


In [196]:
  
eval = model2.evaluate(X_test, y_test,verbose=0)  
print('Testing: Los= {:0.4f}, Accuracy= {:0.4f}, Precision= {:0.4f}, Recall={:0.4f}'.format(eval[0],eval[1],eval[2],eval[3]))

Testing: Los= 0.3398, Accuracy= 0.8590, Precision= 0.7110, Recall=0.4758
