In [1]:
import pandas as pd

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.activations import relu, sigmoid

In [6]:
df = pd.read_csv('Churn_Modelling.csv')

In [7]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [28]:
y=df.iloc[:,13]

In [29]:
y

0       1
1       0
2       1
3       0
4       0
       ..
9995    0
9996    0
9997    1
9998    1
9999    0
Name: Exited, Length: 10000, dtype: int64

In [30]:
X=df.iloc[:,3:13]

In [31]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [14]:
for column in (X.columns):
    attr=X[column].unique()
    print("No of unique elements in {} is {}".format(column, len(attr)))
    

No of unique elements in CreditScore is 460
No of unique elements in Geography is 3
No of unique elements in Gender is 2
No of unique elements in Age is 70
No of unique elements in Tenure is 11
No of unique elements in Balance is 6382
No of unique elements in NumOfProducts is 4
No of unique elements in HasCrCard is 2
No of unique elements in IsActiveMember is 2
No of unique elements in EstimatedSalary is 9999


In [15]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [35]:
le1=LabelEncoder()
le2=LabelEncoder()


In [36]:
X.iloc[:, 1]=le1.fit_transform(X.iloc[:, 1])
X.iloc[:, 2]=le2.fit_transform(X.iloc[:, 2])


In [37]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,0,42,2,0.0,1,1,1,101348.88
1,608,2,0,41,1,83807.86,1,0,1,112542.58
2,502,0,0,42,8,159660.8,3,1,0,113931.57
3,699,0,0,39,1,0.0,2,0,0,93826.63
4,850,2,0,43,2,125510.82,1,1,1,79084.1


In [39]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.25, random_state=1)

In [41]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [44]:
X_train

array([[ 0.71997988, -0.90457101,  0.91776859, ...,  0.64262086,
         0.98333878, -1.48050131],
       [-0.75327193, -0.90457101, -1.08959928, ...,  0.64262086,
        -1.01694352, -1.56523002],
       [ 0.57574543,  1.51493062, -1.08959928, ...,  0.64262086,
        -1.01694352, -1.18210827],
       ...,
       [ 0.22546179, -0.90457101, -1.08959928, ...,  0.64262086,
         0.98333878, -0.13896318],
       [ 0.13273964, -0.90457101, -1.08959928, ...,  0.64262086,
         0.98333878,  0.01979185],
       [ 1.16298567,  0.3051798 ,  0.91776859, ...,  0.64262086,
        -1.01694352, -1.15605488]])

In [46]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [47]:
def build_model(layers, activation):
    model=Sequential()
    for i, nodes in enumerate(layers):
        if i==0:
            model.add(Dense(nodes, input_dim=X_train.shape[1]))
            model.add(Activation(activation))
        else:
            model.add(Dense(nodes))
            model.add(Activation(activation))
    model.add(Dense(1))
    
    model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    return model

model = KerasClassifier(build_fn=build_model, verbose=0)

In [48]:
model

<tensorflow.python.keras.wrappers.scikit_learn.KerasClassifier at 0x7f12d5511dc0>

In [49]:
layers = [[20], [40, 20], [40, 30, 15], [50, 35, 20]]
activations = ['relu', 'sigmoid']
param_grid=dict(layers=layers, activation=activations, batch_size=[64, 128, 256], epochs=[30])
grid  = GridSearchCV(estimator = model, param_grid=param_grid)

In [50]:
grid_result = grid.fit(X_train, y_train)

In [51]:
[grid_result.best_score_,grid_result.best_params_]

[0.8493333339691163,
 {'activation': 'relu',
  'batch_size': 256,
  'epochs': 30,
  'layers': [50, 35, 20]}]

In [54]:
pred_y = grid.predict(X_test)

In [55]:
pred_y

array([[0],
       [0],
       [0],
       ...,
       [0],
       [1],
       [0]])

In [57]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, pred_y)

In [58]:
cm

array([[1921,   59],
       [ 342,  178]])

In [60]:
from sklearn.metrics import accuracy_score
score=accuracy_score(y_test,pred_y)

In [61]:
score

0.8396