In [None]:
#importing libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [None]:
#importing the dataset
data=pd.read_csv("/content/bank-full.csv",sep=';')


In [None]:
data.shape #Data contains 45211 rows and 17 columns 

(45211, 17)

In [None]:
data.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


#Data PreProcessing

##Removing NULL Values

In [None]:
data = data.replace('unknown', np.nan)
data=data.replace('other',np.nan)
data.isna().sum() #There are null values

age              0
job            288
marital          0
education     1857
default          0
balance          0
housing          0
loan             0
contact      13020
day              0
month            0
duration         0
campaign         0
pdays            0
previous         0
poutcome     38799
y                0
dtype: int64

In [None]:
data.drop('contact', inplace=True, axis=1)
data.drop('poutcome', inplace=True, axis=1)
data.drop('duration',inplace=True, axis=1) #We have dropped the 'duration' variable as the value of the variable will only be known at the end of the call. Hence, at that time we will also know the outcome of the call.
data=data.dropna()
data=data.drop_duplicates()

In [None]:
data.shape #43172 rows and 14 columns


(43172, 14)

SyntaxError: ignored

**Correlation Plot**

In [None]:
corrdata = data.corr()
ax,fig = plt.subplots(figsize=(15,8))
sns.heatmap(corrdata,annot=True)

In [None]:
numerical_variables = ['age','balance','day','campaign','pdays','previous']
data.hist(column=numerical_variables,figsize=(10,10))
plt.subplots_adjust(wspace = 0.5, hspace = 0.5)
plt.show()

In [None]:
categorical_variables = ['job','marital', 'education', 'default', 'housing','loan', 'month', 'day','y']
for col in categorical_variables:
    plt.figure(figsize=(10,4))
    sns.barplot(data[col].value_counts().values, data[col].value_counts().index)
    plt.title(col)
    plt.tight_layout()

In [None]:
dataset = data.groupby('y')[month].mean()

##Handling Categorical Variables

In [None]:
data['default'] = data['default'].map({'yes': -1,'no': 0})
data['housing'] = data['housing'].map({'yes': 1,'no': 0})
data['loan'] = data['loan'].map({'yes': 1,'no': 0})

In [None]:
nominal = ['job','marital','education','month',]
dataProcessed = pd.get_dummies(data,columns=nominal)
dataProcessed['y']=dataProcessed['y'].map({'yes': 1,'no': 0})

In [None]:
data=dataProcessed
data.head() #all values have been coverted to numerical values 

In [None]:
data.shape #43172 rows and 39 columns

##Downscaling the Dataset

In [None]:
data1 = data[data['y']==0]
data2 = data[data['y']==1]
data1.shape

In [None]:
data3 = data1.sample(n=5021)
data3.shape

In [None]:
data4 = pd.concat([data2, data3])
data4.shape

# Model Building 


Splitting into training and testing data 

In [None]:
Y=data4["y"]
data4.drop('y', inplace=True, axis=1)
data4 = pd.concat([data4,Y],axis=1)

In [None]:
from sklearn.model_selection import train_test_split
trainX,testX,trainY,testY=train_test_split(data4.iloc[:,:-1],data4.iloc[:,-1],train_size=0.8,random_state=1)

In [None]:
trainX.head()

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier()
model.fit(trainX,trainY)
pred=model.predict(testX)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix
cm1=(confusion_matrix(testY,pred))

print('Confusion Matrix : \n', cm1)
total1=sum(sum(cm1))

accuracy1=(cm1[0,0]+cm1[1,1])/total1
print ('Accuracy : ', accuracy1)

sensitivity1 = cm1[0,0]/(cm1[0,0]+cm1[0,1])
print('Sensitivity : ', sensitivity1 )

specificity1 = cm1[1,1]/(cm1[1,0]+cm1[1,1])
print('Specificity : ', specificity1)

In [None]:
print(classification_report(testY, pred))

In [None]:
import sklearn.metrics as metrics
#AUC
randomforestauc = metrics.roc_auc_score(testY , pred)
randomforestauc

## Support Vector Classifier

In [None]:
from sklearn.svm import SVC
model=SVC(gamma=0.6,degree=0.8,max_iter=10000)
model.fit(trainX,trainY)
pred=model.predict(testX)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix
cm1=(confusion_matrix(testY,pred))
print('Confusion Matrix : \n', cm1)
total1=sum(sum(cm1))

accuracy1=(cm1[0,0]+cm1[1,1])/total1
print ('Accuracy : ', accuracy1)

sensitivity1 = cm1[0,0]/(cm1[0,0]+cm1[0,1])
print('Sensitivity : ', sensitivity1 )

specificity1 = cm1[1,1]/(cm1[1,0]+cm1[1,1])
print('Specificity : ', specificity1)

In [None]:
print(classification_report(pred,testY))

In [None]:
import sklearn.metrics as metrics
#AUC
supportVectorMachineauc = metrics.roc_auc_score(testY , pred)
supportVectorMachineauc

## Artificial Neural Network

In [None]:
pip install tensorflow==2.2

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.utils import normalize

In [None]:
#Normalizing the data
from keras.utils import normalize
Xtrain=normalize(trainX)
Xtest=normalize(testX)

In [None]:
# Defining the model
tf.keras.backend.set_floatx('float64')
model=keras.models.Sequential()
#model.add(keras.layers.Flatten()) #Flattens the input
model.add(keras.layers.Dense(39,activation=tf.nn.relu)) #Adding input layer with 39 neurons and relu activation func
model.add(keras.layers.Dense(39,activation=tf.nn.relu))
model.add(keras.layers.Dense(2,activation=tf.nn.softmax))
# Adam Optimiser converges faster than any other existing optimizer
# sparse categorical crossentropy combines the good of crossentropy and one hot encoding
model.compile(optimizer="adam",loss="sparse_categorical_crossentropy",metrics=["accuracy"])

In [None]:
model.fit(Xtrain,trainY,epochs=100)

In [None]:
predY = model.predict(Xtest)
y_pred_class = np.argmax(predY,axis=1)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix
cm1=(confusion_matrix(testY,y_pred_class))
print('Confusion Matrix : \n', cm1)
total1=sum(sum(cm1))

accuracy1=(cm1[0,0]+cm1[1,1])/total1
print ('Accuracy : ', accuracy1)

sensitivity1 = cm1[0,0]/(cm1[0,0]+cm1[0,1])
print('Sensitivity : ', sensitivity1 )

specificity1 = cm1[1,1]/(cm1[1,0]+cm1[1,1])
print('Specificity : ', specificity1)

In [None]:
print(classification_report(testY,y_pred_class))

In [None]:
import sklearn.metrics as metrics
#AUC
artificialNeuralNetworkauc = metrics.roc_auc_score(testY , y_pred_class)
artificialNeuralNetworkauc

## Probabilistic Neural Network

In [None]:
pip install neupy

In [None]:
from neupy import algorithms
from sklearn import metrics

In [None]:
pnn = algorithms.PNN(std=10,verbose=True)
pnn.train(Xtrain,trainY)

In [None]:
pred=pnn.predict(Xtest)
prob=pnn.predict_proba(Xtest)
print(pred)

In [None]:
y_pred = pnn.predict(testX)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix
cm1=(confusion_matrix(testY,y_pred))
print('Confusion Matrix : \n', cm1)
total1=sum(sum(cm1))

accuracy1=(cm1[0,0]+cm1[1,1])/total1
print ('Accuracy : ', accuracy1)

sensitivity1 = cm1[0,0]/(cm1[0,0]+cm1[0,1])
print('Sensitivity : ', sensitivity1 )

specificity1 = cm1[1,1]/(cm1[1,0]+cm1[1,1])
print('Specificity : ', specificity1)

In [None]:
print(classification_report(testY,y_pred))

In [None]:
import sklearn.metrics as metrics
#AUC
probablisticNeuralNetworkauc = metrics.roc_auc_score(testY , y_pred)
probablisticNeuralNetworkauc

**Comparing all the auc scores:**

In [None]:
print('random forest:', randomforestauc)
print('Support Vector Machines:', supportVectorMachineauc)
print('Artificial Neural Network:', artificialNeuralNetworkauc)
print('Probablistic Neural Network:', probablisticNeuralNetworkauc)

In [None]:
print('best result is:' )
print(max(randomforestauc, supportVectorMachineauc, artificialNeuralNetworkauc, probablisticNeuralNetworkauc))