## Customer Churn Predictions- Using ANN 
We are given data sets for customers in a bank and we need to find if the customer will stay with bank or exit.<br>
We are using ANN that is <b>Artificial Neural Networks</b> to solve this problem.

### import libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Loading data and selecting inputs and output from dataset

In [2]:
data=pd.read_csv("Churn_Modelling.csv")

In [3]:
data.head(2)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0


In [4]:
data.describe()

Unnamed: 0,RowNumber,CustomerId,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,15690940.0,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,2886.89568,71936.19,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,1.0,15565700.0,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,2500.75,15628530.0,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,5000.5,15690740.0,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,7500.25,15753230.0,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,10000.0,15815690.0,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


In [5]:
X=data.iloc[:, 3:-1]
y=data.iloc[:,-1]

### Creating dummies for categorical data- Gender and Geography

In [6]:
geography=pd.get_dummies(X['Geography'],drop_first=True)
gender=pd.get_dummies(X['Gender'], drop_first=True)

### Concatenating dummies created to input data set

In [7]:
X=pd.concat([X,geography,gender],axis=1)
X=X.drop(['Geography','Gender'], axis=1)

In [8]:
X.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Germany,Spain,Male
0,619,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,1,0
2,502,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0,1,0


<b> Splitting the data set into training and test data

In [9]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

<b>feature scaling

In [10]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

<b>Import Keras libraries for ANN model

In [11]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU,PReLU,ELU
from keras.layers import Dropout



<b>Creating ANN model

In [12]:
model=Sequential()
model.add(Dense(units=6, kernel_initializer = 'he_uniform', activation='relu', input_dim=11))
model.add(Dense(units=6, kernel_initializer = 'he_uniform', activation='relu'))
model.add(Dense(units=1, kernel_initializer='glorot_uniform', activation='sigmoid'))

model.compile(optimizer='Adamax', loss= 'binary_crossentropy', metrics=['accuracy'])

<b> Fitting training data to the model

In [13]:
model_result=model.fit(X_train,y_train, validation_split=0.33, batch_size=10, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
# list all data in history

print(model_history.history.keys())
# summarize history for accuracy
plt.plot(model_history.history['acc'])
plt.plot(model_history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(model_history.history['loss'])
plt.plot(model_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


NameError: name 'model_history' is not defined

### Visualizations- visualizing results based on accuracy, loss

In [None]:
## list all data in history
print(model_result.history.keys())

In [None]:
plt.plot(model_result.history['accuracy'])
plt.plot(model_result.history['val_accuracy'])
#plt.title("Model")
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
plt.plot(model_result.history['loss'])
plt.plot(model_result.history['val_loss'])
plt.title=('loss visualization')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train','test'])
plt.show()

<b>evaluation of model with test data

In [None]:
y_pred=model.predict(X_test)
y_pred

In [None]:
y_pred=(y_pred>0.5)

In [None]:
y_pred

<b>Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
cm

<b>Calculating Accuracy

In [None]:
from sklearn.metrics import accuracy_score
score=accuracy_score(y_pred,y_test)

In [None]:
score