In [None]:
#IMPORTS

import numpy as np
import random
import tensorflow as tf
import tensorflow.keras as kr
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.datasets import mnist

from scipy.spatial.distance import euclidean
from sklearn.metrics import confusion_matrix

from time import sleep
from tqdm import tqdm

import copy

import pandas as pd
import matplotlib.pyplot as plt
import math
import seaborn as sns
from numpy.random import RandomState
import scipy as scp
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras import optimizers
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.utils import to_categorical
from keras import backend as K
from itertools import product
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix

from sklearn import mixture

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
rs = RandomState(92) #To reproduce the same results each time we run this notebook

In [None]:
#Load dataset into a pandas DataFrame
bm = pd.read_csv('bank-marketing_csv.csv', na_values='?')

## Preliminary Data Analysis

In [None]:
col_names = bm.columns
for c in col_names:
	bm[c] = bm[c].replace("?", np.NaN)

bm = bm.apply(lambda x:x.fillna(x.value_counts().index[0]))

In [None]:
bm.dropna()

### Unique values of each attribute:

In [None]:
bm.nunique()

### Bar plot of each attribute:

In [None]:
fig = plt.figure(figsize=(20,15))
cols = 5
rows = math.ceil(float(bm.shape[1]) / cols)
for i, column in enumerate(bm.columns):
    ax = fig.add_subplot(rows, cols, i + 1)
    ax.set_title(column)
    if bm.dtypes[column] == np.object:
        bm[column].value_counts().plot(kind="bar", axes=ax)
    else:
        bm[column].hist(axes=ax)
        plt.xticks(rotation="vertical")
plt.subplots_adjust(hspace=0.9, wspace=0.2)

In [None]:
bm['Class'].replace(2, 0, inplace=True)
bm['Class'].replace(1, 1, inplace=True)

### Plotting 'Marital status' according to term deposit class:

In [None]:
data = bm.groupby('V3')['Class'].value_counts().unstack('Class').plot.bar(color=['#deb887','#8b4513'], width=1)
print(data)

In [None]:
bm.replace(['divorced','married','single'],
             [0,1,1], inplace = True)

## Data Preparation

One-hot encoding is the process of representing multi-class categorical features as binary features, one for each class. Although this process increases the dimensionality of the dataset, classification algorithms tend to work better on this format of data.

I use one-hot encoding to represent all the categorical features in the dataset. 


In [None]:
category_col_1 =[ 'V2', 'V4',
               'V5','V7','V8','V9', 'V11', 'V16'] 

bm1 = pd.get_dummies(bm, columns=category_col_1, drop_first=True)

In [None]:
m = len(bm1.columns)

In [None]:
idx = [i for i in range(m) if i != 8]
# idx

In [None]:
X = bm1.iloc[:, idx].values
y = bm1.iloc[:, 8].values

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)
print ("Training set has {} samples.".format(X_train.shape[0]))
print ("Testing set has {} samples.".format(X_test.shape[0]))
print(X_train.shape)

#### Normalization:


In [None]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

#### Training with neural nets:

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense

# BASELINE SCENARIO
def build_model():
    model = Sequential()

    # Adding the input layer and the first hidden layer
    model.add(Dense(output_dim = 45, activation = 'relu', input_dim = 51))
    # Adding the second hidden layer
    model.add(Dense(output_dim = 30, activation = 'relu'))
    # Adding the output layer
    model.add(Dense(output_dim = 1, activation = 'sigmoid'))

    opt = keras.optimizers.Adam(learning_rate=3e-4)
    model.compile(optimizer = opt, loss = 'binary_crossentropy', metrics = ['accuracy'])

    model.fit(X_train, y_train, batch_size = 10, nb_epoch = 100, validation_split=0.2)
    return model

model = build_model()

#### Confusion matrix for Divorced clients:

In [None]:
filter_rows = X_test[:,3]==0
X_test_div = X_test[filter_rows,:]
y_test_div = y_test[filter_rows]
y_pred_div = model.predict(X_test_div)
y_pred_d = np.where(y_pred_div>=0.5, 1,0)

print(classification_report(y_test_div, y_pred_d))
cm_div = confusion_matrix(y_test_div, y_pred_b)
tn,fp,fn,tp = cm_div.ravel()

#### Performance measures:

In [None]:
#accuracy: (tp + tn)/(tp + tn + fp + fn)
accuracy = accuracy_score(y_test_div, y_pred_d)
print('Accuracy: %f' % accuracy)
#precision: tp/tp+fp
precision = precision_score(y_test_div, y_pred_d)
print('Precision: %f' % precision)
#recall: tp/tp+fn
recall = recall_score(y_test_div, y_pred_d)
print('Recall: %f' % recall)
#FNR: 1-recall
fnr = 1 - recall
print('FNR: %f' % fnr)
#FPR: fp/fp+tn
fpr = fp / (fp + tn)
print('FPR: %f' % fpr)
#f1: 2 tp/ (2 tp + fp + fn)
f1 = f1_score(y_test_div, y_pred_d)
print('F1 score: %f' % f1)

#### ROC AUC score and Gini coefficient:

In [None]:
false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test_div, y_pred_d)
roc_auc = auc(false_positive_rate, true_positive_rate)


plt.title('Receiver Operating Characteristic Curve')
plt.plot(false_positive_rate, true_positive_rate, 'b',label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.1])
plt.ylim([-0.1,1.1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

Gini_coefficient=2*roc_auc - 1
print ("Gini_coefficient =",Gini_coefficient)

#### Confusion matrix for 'nonblack' (1) individuals:

In [None]:
filter_rows = X_test[:,2]==1
X_test_nd = X_test[filter_rows,:]
y_test_nd = y_test[filter_rows]
y_pred_ndiv = model.predict(X_test_nd)
y_pred_nd = np.where(y_pred_ndiv>=0.5, 1,0)

print(classification_report(y_test_nd, y_pred_nd))
cmnd = confusion_matrix(y_test_nd, y_pred_nd)
tn,fp,fn,tp = cmnd.ravel()

#### Performance measures: 

In [None]:
#accuracy: (tp + tn)/(tp + tn + fp + fn)
accuracy = accuracy_score(y_test_nd, y_pred_nd)
print('Accuracy: %f' % accuracy)
#precision: tp/tp+fp
precision = precision_score(y_test_nd, y_pred_nd)
print('Precision: %f' % precision)
#recall: tp/tp+fn
recall = recall_score(y_test_nd, y_pred_nd)
print('Recall: %f' % recall)
#FNR: 1-recall
fnr = 1 - recall
print('FNR: %f' % fnr)
#FPR: fp/fp+tn
fpr = fp / (fp + tn)
print('FPR: %f' % fpr)
#f1: 2 tp/ (2 tp + fp + fn)
f1 = f1_score(y_test_nd, y_pred_nd)
print('F1 score: %f' % f1)

#### ROC AUC score and Gini coefficient:

In [None]:
false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test_nd, y_pred_nd)
roc_auc = auc(false_positive_rate, true_positive_rate)


plt.title('Receiver Operating Characteristic Curve')
plt.plot(false_positive_rate, true_positive_rate, 'b',label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.1])
plt.ylim([-0.1,1.1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

Gini_coefficient=2*roc_auc - 1
print ("Gini_coefficient =",Gini_coefficient)