In [36]:
# The data given in the url is related with direct marketing campaigns of a banking
# institution. The marketing campaigns were based on phone calls. Often, more
# than one contact to the same client was required, in order to access if the product
# (bank term deposit) would be ('yes') or not ('no') subscribed. Build a model to
# predict whether client will subscribe to term deposit

import pandas as pd
import numpy as np

In [37]:
dataset=pd.read_csv("bank.csv",sep=';')
# dataset=dataset.head(50)
dataset=dataset[['age','contact','education','day','duration','campaign','pdays','previous','y']]
dataset.head()

Unnamed: 0,age,contact,education,day,duration,campaign,pdays,previous,y
0,30,cellular,primary,19,79,1,-1,0,no
1,33,cellular,secondary,11,220,1,339,4,no
2,35,cellular,tertiary,16,185,1,330,1,no
3,30,unknown,tertiary,3,199,4,-1,0,no
4,59,unknown,secondary,5,226,1,-1,0,no


In [38]:
dataset['y'].replace(['no','yes'],[0,1],inplace=True)
dataset['contact'].replace(['unknown','cellular','telephone'],[0,1,2],inplace=True)
dataset['education'].replace(['primary','secondary','tertiary','unknown'],[0,1,2,3],inplace=True)
X=dataset[['age','contact']]
Y=dataset['y']
len(X)

4521

In [39]:
Z = np.zeros((X.shape))
for i in range(len(X)):
    Z[i, Y[i]] = 1

In [40]:
X.shape

(4521, 2)

In [41]:
Wi_1 = np.random.randn(3, 2)
Bi_1 = np.random.randn(3)
Wi_2 = np.random.randn(3, 2)
Bi_2 = np.random.randn(2)

In [42]:
Wi_1.T

array([[ 0.35464839,  0.59283329,  0.49862868],
       [ 1.27528312,  1.22095997, -0.02394479]])

In [43]:
X=np.array(X)
X

array([[30,  1],
       [33,  1],
       [35,  1],
       ...,
       [57,  1],
       [28,  1],
       [44,  1]], dtype=int64)

In [44]:
X.dot(Wi_1.T)

array([[11.91473484, 19.00595876, 14.93491575],
       [12.97868002, 20.78445864, 16.4308018 ],
       [13.6879768 , 21.97012523, 17.42805917],
       ...,
       [21.49024139, 35.01245767, 28.39789024],
       [11.20543806, 17.82029218, 13.93765838],
       [16.87981231, 27.30562486, 21.91571733]])

In [45]:
def forward_prop(X, Wi_1, Bi_1, Wi_2, Bi_2):
    #first layer 
    M = 1 / (1 + np.exp(-(X.dot(Wi_1.T) + Bi_1)))
    # second layer
    A = M.dot(Wi_2) + Bi_2
    expA = np.exp(A)
    Y = expA / expA.sum(axis=1, keepdims=True)
    return Y, M

In [46]:
forward_prop(X, Wi_1, Bi_1, Wi_2, Bi_2)

(array([[0.97690362, 0.02309638],
        [0.97690372, 0.02309628],
        [0.97690375, 0.02309625],
        ...,
        [0.97690377, 0.02309623],
        [0.97690346, 0.02309654],
        [0.97690377, 0.02309623]]),
 array([[0.99999452, 0.99999999, 0.99999981],
        [0.99999811, 1.        , 0.99999996],
        [0.99999907, 1.        , 0.99999998],
        ...,
        [1.        , 1.        , 1.        ],
        [0.99998885, 0.99999998, 0.99999949],
        [0.99999996, 1.        , 1.        ]]))

In [47]:
#Returns gradient for Weight_2
def diff_Wi_2(H, Z, Y):
    return H.T.dot(Z - Y)

In [48]:
#Returns gradient for Weight_2
def diff_Wi_1(X, H, Z, output, Wi_2):
    dZ = (Z - output).dot(Wi_2.T) * H * (1 - H)
    return X.T.dot(dZ)

In [49]:
# Returns derivative for both bias
def diff_B2(Z, Y):
    return (Z - Y).sum(axis=0)

def diff_B1(Z, Y, Wi_2, H):
    return ((Z - Y).dot(Wi_2.T) * H * (1 - H)).sum(axis=0)

In [50]:
learning_rate = 1e-3
for epoch in range(5000):
    output, hidden = forward_prop(X, Wi_1, Bi_1, Wi_2, Bi_2)
    Wi_2 += learning_rate * diff_Wi_2(hidden, Z, output)
    Bi_2 += learning_rate * diff_B2(Z, output)
    Wi_1 += learning_rate * diff_Wi_1(X, hidden, Z, output, Wi_2).T
    Bi_1 += learning_rate * diff_B1(Z, output, Wi_2, hidden)

In [51]:
X_test =np.array([5,5])

hidden_output = 1 / (1 + np.exp(-X_test.dot(Wi_1.T) - Bi_1))
Outer_layer_output = hidden_output.dot(Wi_2) + Bi_2
expA = np.exp(Outer_layer_output)
Y = expA / expA.sum()
print(" Accuracy of class No :  {} \n Accuracy of class Yes : {}".format(Y[0],Y[1]))

 Accuracy of class No :  0.9674681255825134 
 Accuracy of class Yes : 0.03253187441748661
