In [None]:
#reading the csv file
import pandas as pd

df = pd.read_csv('bank_customer_data.csv')
print(df)

      RowNumber  CustomerId    Surname  ...  IsActiveMember EstimatedSalary Exited
0             1    15634602   Hargrave  ...               1       101348.88      1
1             2    15647311       Hill  ...               1       112542.58      0
2             3    15619304       Onio  ...               0       113931.57      1
3             4    15701354       Boni  ...               0        93826.63      0
4             5    15737888   Mitchell  ...               1        79084.10      0
...         ...         ...        ...  ...             ...             ...    ...
9995       9996    15606229   Obijiaku  ...               0        96270.64      0
9996       9997    15569892  Johnstone  ...               1       101699.77      0
9997       9998    15584532        Liu  ...               1        42085.58      1
9998       9999    15682355  Sabbatini  ...               0        92888.52      1
9999      10000    15628319     Walker  ...               0        38190.78      0

[10

In [None]:
#performing EDA (exploratory data analysis)#
import pandas as pd

def PrintDataStats(df, numericAttributes, nominalAttributes, binaryAttributes):
  
  for attributeName in numericAttributes:
    print("##########Stats of "+attributeName+"##############")
    print("Attribute Type: Numeric")
    print("Mean: "+str(df[attributeName].mean()) )
    print("Max: "+str(df[attributeName].max()))
    print("Min: "+ str(df[attributeName].min()))
    print("Standard Deviation: "+str(df[attributeName].std()))
    print("Variance: "+str(df[attributeName].var()))
    print("")

  for attributeName in nominalAttributes:
    print("##########Stats of "+attributeName+"##############")
    print("Attribute Type: Nominal")
    uniqueValues=df[attributeName].unique()
    print("Unique Values: ")
    print(uniqueValues);
    print(df[attributeName].value_counts())
    print("Mode: "+df[attributeName].mode().values)
    print()

  for attributeName in binaryAttributes:
    print("##########Stats of "+attributeName+"##############")
    print("Attribute Type: Binary")
    uniqueValues=df[attributeName].unique()
    print("Unique Values: ")
    print(uniqueValues);
    print(df[attributeName].value_counts())
    print("Mode: "+str(df[attributeName].mode().values))
    print()

PrintDataStats(df, 
               ["CreditScore","Age","Balance", "Tenure", "NumOfProducts", "EstimatedSalary"], 
               ["Surname", "Geography"], 
               ["Gender", "HasCrCard", "IsActiveMember", "Exited"])

##########Stats of CreditScore##############
Attribute Type: Numeric
Mean: 650.5288
Max: 850
Min: 350
Standard Deviation: 96.65329873613061
Variance: 9341.860156575705

##########Stats of Age##############
Attribute Type: Numeric
Mean: 38.9218
Max: 92
Min: 18
Standard Deviation: 10.487806451704591
Variance: 109.99408416841645

##########Stats of Balance##############
Attribute Type: Numeric
Mean: 76485.88928799961
Max: 250898.09
Min: 0.0
Standard Deviation: 62397.40520238623
Variance: 3893436175.9907765

##########Stats of Tenure##############
Attribute Type: Numeric
Mean: 5.0128
Max: 10
Min: 0
Standard Deviation: 2.892174377049708
Variance: 8.364672627262866

##########Stats of NumOfProducts##############
Attribute Type: Numeric
Mean: 1.5302
Max: 4
Min: 1
Standard Deviation: 0.5816543579989936
Variance: 0.3383217921792214

##########Stats of EstimatedSalary##############
Attribute Type: Numeric
Mean: 100090.2398809998
Max: 199992.48
Min: 11.58
Standard Deviation: 57510.49281769822
Var

In [None]:
#removing less useful attributes because these attributes do not help in finding any pattern
df = df.drop(columns={"RowNumber", "CustomerId", "Surname"})
print(df)

      CreditScore Geography  Gender  ...  IsActiveMember  EstimatedSalary  Exited
0             619    France  Female  ...               1        101348.88       1
1             608     Spain  Female  ...               1        112542.58       0
2             502    France  Female  ...               0        113931.57       1
3             699    France  Female  ...               0         93826.63       0
4             850     Spain  Female  ...               1         79084.10       0
...           ...       ...     ...  ...             ...              ...     ...
9995          771    France    Male  ...               0         96270.64       0
9996          516    France    Male  ...               1        101699.77       0
9997          709    France  Female  ...               1         42085.58       1
9998          772   Germany    Male  ...               0         92888.52       1
9999          792    France  Female  ...               0         38190.78       0

[10000 rows x 1

In [None]:
#converting nominal attributes into numeric attributes

df["GeographyFrance"]=0
df['GeographyFrance']= df['Geography'].apply(lambda x: 1 if (x == 'France') else 0) 

df["GeographySpain"]=0
df['GeographySpain']= df['Geography'].apply(lambda x: 1 if (x == 'Spain') else 0) 

df["GeographyGermany"]=0
df['GeographyGermany']= df['Geography'].apply(lambda x: 1 if (x == 'Germany') else 0) 

df["Gender"]= df["Gender"].apply(lambda x: 1 if (x=="Male") else 0)


In [None]:
#removing nominal attributes because we have made new  numeric attributes for nominal attributes#
df = df.drop(columns={"Geography"})


In [None]:
#Performing Min-Max Normalization of attributes (binary attributes 
#having values 0 or 1 will remain unchanged).#
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

df=normalize(df)

In [None]:
print(df.columns)

Index(['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts',
       'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited',
       'GeographyFrance', 'GeographySpain', 'GeographyGermany'],
      dtype='object')


In [None]:
#diving the dataset into train set and test set

#permutating the dataset (shuffling)
df_permutated = df.sample(frac=1)

train_size = 0.8
train_end = int(len(df_permutated)*train_size)

df_train = df_permutated[:train_end]

df_test = df_permutated[train_end:]

In [None]:
#separating labels
df_train_labels=df_train["Exited"]
df_train = df_train.drop(columns="Exited")

df_test_labels=df_test["Exited"]
df_test = df_test.drop(columns="Exited")


In [None]:
print(df_train.shape)
print(df_test.shape)

(8000, 12)
(2000, 12)


In [None]:
#Defining the Neural Network Model for Dataset 1

from keras.models import Sequential
from keras.layers import Dense, Dropout

# define the keras model
model = Sequential()
model.add(Dense(12, input_dim=df_train.shape[1], activation='relu')) #1st hidden layer having 12 neurons and relu activation function
model.add(Dense(8, activation='tanh')) #2nd hidden layer having 8 neurons and tanh activation function
model.add(Dense(1, activation='sigmoid')) #the output layer having a single neuron and sigmoid activation function

# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
...

Ellipsis

In [None]:
model.fit(df_train.values, df_train_labels.values, epochs=30, batch_size=32)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fa32cd5c5d0>

In [None]:
#Evaluating the Model

# evaluate the keras model
_, accuracy = model.evaluate(df_test.values, df_test_labels.values)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 86.10
