In [1]:
import pandas as pd
import numpy as np    
from sklearn.preprocessing import LabelEncoder

In [6]:
df = pd.read_csv('bank-full.csv',sep=";")


In [7]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [8]:
df.shape


(45211, 17)

In [19]:
df['month'].value_counts()

may    13766
jul     6895
aug     6247
jun     5341
nov     3970
apr     2932
feb     2649
jan     1403
oct      738
sep      579
mar      477
dec      214
Name: month, dtype: int64

In [22]:
col=['job','marital','education','default','housing','loan','contact','poutcome']

In [20]:
look_up = { 'jan':1,  'feb':2,  'mar':3,  'apr':4, 'may':5,
             'jun':6,  'jul':7,  'aug':8,  'sep':9, 'oct':10, 'nov':11, 'dec':12}

df['month'] = df['month'].apply(lambda x: look_up[x])

In [23]:
le=LabelEncoder()
for c in col:
    df[c]=le.fit_transform(df[c])
df    

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,4,1,2,0,2143,1,0,2,5,5,261,1,-1,0,3,no
1,44,9,2,1,0,29,1,0,2,5,5,151,1,-1,0,3,no
2,33,2,1,1,0,2,1,1,2,5,5,76,1,-1,0,3,no
3,47,1,1,3,0,1506,1,0,2,5,5,92,1,-1,0,3,no
4,33,11,2,3,0,1,0,0,2,5,5,198,1,-1,0,3,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,51,9,1,2,0,825,0,0,0,17,11,977,3,-1,0,3,yes
45207,71,5,0,0,0,1729,0,0,0,17,11,456,2,-1,0,3,yes
45208,72,5,1,1,0,5715,0,0,0,17,11,1127,5,184,3,2,yes
45209,57,1,1,1,0,668,0,0,1,17,11,508,4,-1,0,3,no


In [24]:
X=df.drop(columns=['y'])

In [26]:
X=np.array(X)

In [28]:
from sklearn.preprocessing import OneHotEncoder
one_hot_encoder = OneHotEncoder(sparse=False)

Y = df.y
Y = one_hot_encoder.fit_transform(np.array(Y).reshape(-1, 1))
Y[:5]

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.]])

In [29]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.1)

In [30]:
def NeuralNetwork(X_train, Y_train, X_val=None, Y_val=None, epochs=10, nodes=[], lr=0.15):
    hidden_layers = len(nodes) - 1
    weights = InitializeWeights(nodes)

    for epoch in range(1, epochs+1):
        weights = Train(X_train, Y_train, lr, weights)

        if(epoch % 20 == 0):
            print("Epoch {}".format(epoch))
            print("Training Accuracy:{}".format(Accuracy(X_train, Y_train, weights)))
            if X_val.any():
                print("Validation Accuracy:{}".format(Accuracy(X_val, Y_val, weights)))
            
    return weights

In [31]:
def InitializeWeights(nodes):
    """Initialize weights with random values in [-1, 1] (including bias)"""
    layers, weights = len(nodes), []
    
    for i in range(1, layers):
        w = [[np.random.uniform(-1, 1) for k in range(nodes[i-1] + 1)]
              for j in range(nodes[i])]
        weights.append(np.matrix(w))
    
    return weights

In [32]:
def ForwardPropagation(x, weights, layers):
    activations, layer_input = [x], x
    for j in range(layers):
        activation = Sigmoid(np.dot(layer_input, weights[j].T))
        activations.append(activation)
        layer_input = np.append(1, activation) # Augment with bias
    
    return activations

In [33]:
def BackPropagation(y, activations, weights, layers):
    outputFinal = activations[-1]
    error = np.matrix(y - outputFinal) # Error at output
    
    for j in range(layers, 0, -1):
        currActivation = activations[j]
        
        if(j > 1):
            # Augment previous activation
            prevActivation = np.append(1, activations[j-1])
        else:
            # First hidden layer, prevActivation is input (without bias)
            prevActivation = activations[0]
        
        delta = np.multiply(error, SigmoidDerivative(currActivation))
        weights[j-1] += lr * np.multiply(delta.T, prevActivation)

        w = np.delete(weights[j-1], [0], axis=1)         
        error = np.dot(delta, w) # Calculate error for current layer
    
    return weights



In [34]:
def Train(X, Y, lr, weights):
    layers = len(weights)
    for i in range(len(X)):
        x, y = X[i], Y[i]
        x = np.matrix(np.append(1, x)) # Augment feature vector
        
        activations = ForwardPropagation(x, weights, layers)
        weights = BackPropagation(y, activations, weights, layers)

    return weights

In [35]:
def Sigmoid(x):
    return 1 / (1 + np.exp(-x))

def SigmoidDerivative(x):
    return np.multiply(x, 1-x)

In [36]:
def Predict(item, weights):
    layers = len(weights)
    item = np.append(1, item) # Augment feature vector
    
    ##_Forward Propagation_##
    activations = ForwardPropagation(item, weights, layers)
    
    outputFinal = activations[-1].A1
    index = FindMaxActivation(outputFinal)

    # Initialize prediction vector to zeros
    y = [0 for i in range(len(outputFinal))]
    y[index] = 1  # Set guessed class to 1

    return y # Return prediction vector


def FindMaxActivation(output):
    """Find max activation in output"""
    m, index = output[0], 0
    for i in range(1, len(output)):
        if(output[i] > m):
            m, index = output[i], i
    
    return index

In [37]:
def Accuracy(X, Y, weights):
    """Run set through network, find overall accuracy"""
    correct = 0

    for i in range(len(X)):
        x, y = X[i], list(Y[i])
        guess = Predict(x, weights)

        if(y == guess):
            # Guessed correctly
            correct += 1

    return correct / len(X)

In [38]:
f = len(X[0]) # Number of features
o = len(Y[0]) # Number of outputs / classes

layers = [f, 5, 10, o] # Number of nodes in layers
lr, epochs = 0.15, 100

weights = NeuralNetwork(X_train, Y_train, X_val, Y_val, epochs=epochs, nodes=layers, lr=lr);

Epoch 20
Training Accuracy:0.8843751807089574
Validation Accuracy:0.8810824876398647
Epoch 40
Training Accuracy:0.8843751807089574
Validation Accuracy:0.8810824876398647
Epoch 60
Training Accuracy:0.8843751807089574
Validation Accuracy:0.8810824876398647
Epoch 80
Training Accuracy:0.8843751807089574
Validation Accuracy:0.8810824876398647
Epoch 100
Training Accuracy:0.8843751807089574
Validation Accuracy:0.8810824876398647


In [39]:
print("Testing Accuracy: {}".format(Accuracy(X_test, Y_test, weights)))

Testing Accuracy: 0.8771748746682395


In [40]:
y_pred=[]
for i in range(len(X_test)):
    x= X_test[i]
    guess = Predict(x, weights)
    y_pred.append(guess)

In [41]:
def fun(ls):
    if ls[0]==0:
        return "No"
    else :
        return "Yes"

In [42]:
y_final_pred=[]
y_test_final=[]
for i in range(len(X_test)):
    y_final_pred.append(fun(y_pred[i]))
    y_test_final.append(fun(Y_test[i]))

In [43]:
from sklearn import metrics
# Print the confusion matrix

print(metrics.confusion_matrix(y_test_final,y_final_pred))

# Print the precision and recall, among other metrics
print(metrics.classification_report(y_test_final,y_final_pred, digits=3))

[[   0  833]
 [   0 5949]]
              precision    recall  f1-score   support

          No      0.000     0.000     0.000       833
         Yes      0.877     1.000     0.935      5949

    accuracy                          0.877      6782
   macro avg      0.439     0.500     0.467      6782
weighted avg      0.769     0.877     0.820      6782



  'precision', 'predicted', average, warn_for)
