# Bank Customer's Churn Classification

Classifying bank customers based on their previous banking activities, neither they will leave the bank or not.

Dataset: https://www.superdatascience.com/pages/deep-learning

In [1]:
# Connecting Google Drive with Google Colab
from google.colab import drive
drive.mount('/content/drive/')

ModuleNotFoundError: No module named 'google.colab'

In [None]:
# Importing libraries
import numpy as np
import pandas as pd

In [None]:
# Loading the dataset
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Datafiles/Churn_Modelling.csv')

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Exploring the Dataset

In [None]:
# Returns number of rows and columns of dataset
df.shape

In [None]:
# Returns the first 5 number of rows
df.head()

In [None]:
# Returns the last 5 number of rows
df.tail()

In [None]:
# Returns different datatypes for each columns (float, int, string, bool, etc.)
df.dtypes

In [None]:
# Returns an object with all of the column headers 
df.columns

In [None]:
# Returns number of null values
df.isnull().sum()

In [None]:
# Importing essential libraries for visualizations
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
# Visualizing the count of 'exited customers' in the dataset
plt.figure(figsize=(8,8))
sns.set(style="darkgrid")
sns.countplot(x='Exited', data=df)
plt.xlabel('0: Customers still with the bank, 1: Customers exited the bank')
plt.ylabel('Count')
plt.title('Bank Customers Churn Visualization')
plt.show()

## Feature Engineering

In [None]:
dropped_columns=df.iloc[:,1:3]
dropped_columns

In [None]:
df.drop(['RowNumber','CustomerId','Surname'], axis=1, inplace=True)
df.columns

In [None]:
# Converting 'Geography' and 'Gender' column
geography = pd.get_dummies(df['Geography'], drop_first=True)
gender = pd.get_dummies(df['Gender'], drop_first=True)

In [None]:
# Appending columns to original dataframe
df = pd.concat([df, geography, gender], axis=1)
df.columns

In [None]:
df.drop(['Geography', 'Gender'], axis=1, inplace=True)

In [None]:
df.head()

## Data Preprocessing

In [None]:
X = df.drop('Exited', axis=1)
y = df['Exited']

In [None]:
# Splitting the dataset into the train and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Building Artificial Neural Network (ANN)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Embedding, Flatten, LeakyReLU, BatchNormalization, Dropout
from keras.activations import relu, sigmoid
from keras.layers import LeakyReLU

In [None]:
#KerasClassifier will help to find out how many layer I need
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
def create_model(layers, activation): #put later number of layers and activation function
    model = Sequential()
    for i, nodes in enumerate(layers):
        if i==0:
            model.add(Dense(nodes,input_dim=X_train.shape[1]))
            model.add(Activation(activation))
        else:
            model.add(Dense(nodes))
            model.add(Activation(activation))
        model.add(Dense(1)) # Note:no activation beyond this point
        
       #binary_crossentropy' binary_crossentropy becasue we have binary classification problem
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model
    
model = KerasClassifier(build_fn=create_model, verbose=0)
    

In [None]:
model

In [None]:
# 1 layer 20 neurons, 2 layers: 1st 20 2nd 40 neurons
layers = [[20],[40, 20],[45, 30, 15]]
activations = ['sigmoid', 'relu']
param_grid = dict(layers=layers, activation=activations, batch_size = [128, 256], epochs= [30])
# GridSearchCV is a hyperparametre optimisation
grid = GridSearchCV(estimator=model, param_grid=param_grid)

In [None]:
grid_result = grid.fit(X_train, y_train)

In [None]:
type (layers)

In [None]:
type (activations)

In [None]:
type(param_grid)

In [None]:
type(grid)

In [None]:
[grid.results.best_scone_,grid_results]