In [40]:
# Keras based on both Tensorflow and Theano
!pip install Theano



**Part 1 - Data Preprocessing**

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [42]:
dataset = pd.read_csv("Churn_Modelling.csv")
dataset[:5]

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [0]:
X = dataset.iloc[:, 3: -1].values   # Include those columns that might involve in affecting the result
y = dataset.iloc[:, -1].values

In [44]:
print("X: \n" + str(X[:5]))
print("y: \n" + str(y[:5]))

X: 
[[619 'France' 'Female' 42 2 0.0 1 1 1 101348.88]
 [608 'Spain' 'Female' 41 1 83807.86 1 0 1 112542.58]
 [502 'France' 'Female' 42 8 159660.8 3 1 0 113931.57]
 [699 'France' 'Female' 39 1 0.0 2 0 0 93826.63]
 [850 'Spain' 'Female' 43 2 125510.82 1 1 1 79084.1]]
y: 
[1 0 1 0 0]


In [0]:
# Encoding since there is not numerical values
# Like example France, Female, Spain...

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])   # For country
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])   # For gender

# OneHotEncoder for country
transformer = ColumnTransformer(
    transformers=[
        ("OneHot",        # Just a name
         OneHotEncoder(), # The transformer class
         [1]              # The column(s) to be applied on.
         )
    ],
    remainder='passthrough' # donot apply anything to the remaining columns
)
X = transformer.fit_transform(X.tolist())
X = X.astype('float64').tolist()

In [46]:
print("X: \n" + str(X[:5]))

X: 
[[1.0, 0.0, 0.0, 619.0, 0.0, 42.0, 2.0, 0.0, 1.0, 1.0, 1.0, 101348.88], [0.0, 0.0, 1.0, 608.0, 0.0, 41.0, 1.0, 83807.86, 1.0, 0.0, 1.0, 112542.58], [1.0, 0.0, 0.0, 502.0, 0.0, 42.0, 8.0, 159660.8, 3.0, 1.0, 0.0, 113931.57], [1.0, 0.0, 0.0, 699.0, 0.0, 39.0, 1.0, 0.0, 2.0, 0.0, 0.0, 93826.63], [0.0, 0.0, 1.0, 850.0, 0.0, 43.0, 2.0, 125510.82, 1.0, 1.0, 1.0, 79084.1]]


In [49]:
# Dummy variable trap -> 3 variable only needs 2 cols

# list to array
X = np.asarray(X)

X = X[:, 1:]
print(X[:5])

[[0.0000000e+00 0.0000000e+00 6.1900000e+02 0.0000000e+00 4.2000000e+01
  2.0000000e+00 0.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00
  1.0134888e+05]
 [0.0000000e+00 1.0000000e+00 6.0800000e+02 0.0000000e+00 4.1000000e+01
  1.0000000e+00 8.3807860e+04 1.0000000e+00 0.0000000e+00 1.0000000e+00
  1.1254258e+05]
 [0.0000000e+00 0.0000000e+00 5.0200000e+02 0.0000000e+00 4.2000000e+01
  8.0000000e+00 1.5966080e+05 3.0000000e+00 1.0000000e+00 0.0000000e+00
  1.1393157e+05]
 [0.0000000e+00 0.0000000e+00 6.9900000e+02 0.0000000e+00 3.9000000e+01
  1.0000000e+00 0.0000000e+00 2.0000000e+00 0.0000000e+00 0.0000000e+00
  9.3826630e+04]
 [0.0000000e+00 1.0000000e+00 8.5000000e+02 0.0000000e+00 4.3000000e+01
  2.0000000e+00 1.2551082e+05 1.0000000e+00 1.0000000e+00 1.0000000e+00
  7.9084100e+04]]


In [0]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [0]:
# Feature Scaling

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

**Part 2 - Now let's make the ANN!**

In [53]:
import keras

Using TensorFlow backend.


In [0]:
from keras.models import Sequential
from keras.layers import Dense

In [0]:
# Initializing the ANN

classifier = Sequential() # Defining the sequence of layers

In [59]:
# Adding the input layer and the first hidden layer
classifier.add(Dense(units= 6, activation = "relu", input_dim = 11))
# units -> output dimension, activation -> activation function






In [0]:
# Adding the second hidden layer
classifier.add(Dense(units= 6, activation = "relu")

In [0]:
# Adding the final layer
classifier.add(Dense(units= 1, activation = "sigmoid", input_dim = 6))

# If the output has more than 1 dimensions (2 or more outputs)
# Then units = 2 or more; activation = "softmax" (just like sigmoid but more outputs)

In [62]:
# Compiling the ANN
classifier.compile(optimizer='adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
# binary_crossentropy for 1 output model
# cross_entropy for more



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [63]:
# Fitting the ANN to the training set
classifier.fit(X_train, y_train, batch_size=10, epochs=100)




Epoch 1/100





Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100


<keras.callbacks.History at 0x7f3bd0788630>

**Part 3 - Making the predictions and evaluating the model**

In [66]:
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
y_pred[:6]

array([[False],
       [False],
       [False],
       [False],
       [False],
       [ True]])

In [0]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

In [68]:
cm

array([[1523,   72],
       [ 204,  201]])

In [70]:
accuracy = (cm[0][0]+cm[1][1])/2000
print(accuracy)

0.862
