In [1]:
import numpy as np
import pandas as pd
import math

In [2]:
data = pd.read_csv('Xor_Dataset.csv')
data.head()

Unnamed: 0,X,Y,Z
0,0,0,0
1,0,1,1
2,1,1,0
3,1,1,0
4,0,0,0


In [3]:
_input = data.drop('Z', axis=1)

_input = np.array(_input)


output = np.array(data['Z'])

print (output)
print(_input)


[0 1 0 ... 0 0 0]
[[0 0]
 [0 1]
 [1 1]
 ...
 [1 1]
 [1 1]
 [1 1]]


In [4]:
row, column  = data.shape
print("row:", row)
print("column:", column)

row: 10000
column: 3


In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(_input, output, test_size=0.3)

In [8]:
def generate():

    W1 = (np.random.rand(2,2)-0.5)*1.1
    W2 = (np.random.rand(2)-0.5)*1.1

    B1 = np.array([0 , 0])
    B2 = np.array([0])

    return W1, W2, B1, B2


#relu function
def relu(z1):
    return np.maximum(z1, 0)

#sigmoid function
def sigmoid(z2):
    return 1/(1+math.e**-(z2))

#forward propogation of input A through W1, B1, W2, B2
def forward_propagation(W1, W2, B1, B2, A):
    z1 = W1.dot(A) + B1
    h = relu(z1)
    z2 = W2.dot(h) + B2
    y = sigmoid(z2)
    
    return y, z1, z2, h


def dLoss(y, epoch):
    #loss = 0.5*(H - Y_train[epoch])**2
    d_loss = (y - Y_train[epoch])
    return d_loss



# derivative of output with respect to sigmoid(Z2)
def dY(Z2):
    return ( sigmoid(Z2) * (1 - (sigmoid(Z2))) )


# Z2 = W20 * h0 + W21 * h1 + b
# derivative of Z2 with respect to h
def update_dZ2_h(W2):
    dZ2_h0 = W2[0]
    dZ2_h1 = W2[1]
    return dZ2_h0, dZ2_h1

# Z2 = W20 * h0 + W21 * h1 + b
# derivative of Z2 with respect to w
def update_dZ2_w(H):
    return H[0], H[1]

# Z10 = Wa * a0 + Wc * a1 + b
# Z11 = Wb * a0 + Wd * a1 + b
# derivative of Z1 with respect to w
def update_dZ1_w(A):
    dZ1_wa = A[0]
    dZ1_wb = A[0]
    dZ1_wc = A[1]
    dZ1_wd = A[1]
    return dZ1_wa, dZ1_wc, dZ1_wb, dZ1_wd
    

# derivative of h with respect to relu(Z1)
def dH(Z1):
    return (Z1 > 0 ) * 1
    
#calculate partial derivatives with respect to W2
def dW2(y, epoch, Z2, W2, h):
    dZ2_w0, dZ2_w1 = update_dZ2_w(h)
    
    dL_dW20 = dLoss(y, epoch)*dY(Z2)*dZ2_w0
    dL_dw21 = dLoss(y, epoch)*dY(Z2)*dZ2_w1
    
    d_W2 = np.array([dL_dW20, dL_dw21])
    d_W2.shape = (2,)
    
    return d_W2

#calculate partial derivatives with respect to W1
def dW1(y, epoch, Z2, Z1, A, W2):
    
    dZ1_wa, dZ1_wc, dZ1_wb, dZ1_wd = update_dZ1_w(A)
    dZ2_h0, dZ2_h1 = update_dZ2_h(W2)
    
    dL_dW1a = dLoss(y, epoch)*dY(Z2)*dZ2_h0*dH(Z1[0])*dZ1_wa
    dL_dW1c = dLoss(y, epoch)*dY(Z2)*dZ2_h0*dH(Z1[0])*dZ1_wc
    
    dL_dW1b = dLoss(y, epoch)*dY(Z2)*dZ2_h1*dH(Z1[1])*dZ1_wb
    dL_dW1d = dLoss(y, epoch)*dY(Z2)*dZ2_h1*dH(Z1[1])*dZ1_wd
    
    d_W1 = np.array([dL_dW1a, dL_dW1c, dL_dW1b, dL_dW1d])
    
    d_W1.shape = (2,2)
    
    return d_W1


#calculate derivatives with respect to B1
def dB1(y, epoch, Z2, Z1, W2):
    
    dZ2_h0, dZ2_h1 = update_dZ2_h(W2)
    
    dB1_0 = dLoss(y, epoch)*dY(Z2)*dZ2_h0*dH(Z1[0])*1
    dB1_1 =  dLoss(y, epoch)*dY(Z2)*dZ2_h0*dH(Z1[1])*1
    
    d_B1 = (np.array([ dB1_0,  dB1_1]))
    d_B1.shape = (2,)
    
    return d_B1


#calculate derivatives with respect to B2
def dB2(y, epoch, Z2):
    d_B2 = dLoss(y, epoch)*dY(Z2)*(1)
    return d_B2


#calculate all partial derivatives
def backward_propagation(y, epoch, Z2, Z1, A, W2, h):
    
    d_W2 = -1*dW2(y, epoch, Z2, W2, h)
    d_W1 = -1*dW1(y, epoch, Z2, Z1, A, W2)
    d_B2 = -1*dB2(y, epoch, Z2)
    d_B1 = -1*dB1(y, epoch, Z2, Z1, W2)
    return d_W2, d_W1, d_B2, d_B1

    
def update(W1, W2, d_W2, d_W1, B1, B2, d_B1, d_B2, pd_W2, pd_W1, pd_B2, pd_B1, LEARN_RATE, MF ):
    
    
    W2 = W2 + (d_W2 * LEARN_RATE) + (pd_W2 * MF)
    W1 = W1 + (d_W1 * LEARN_RATE) + (pd_W1 * MF)
    B2 = B2 + (d_B2 * LEARN_RATE) + (pd_B2 * MF)
    B1 = B1 + (d_B1 * LEARN_RATE) + (pd_B1 * MF)
    
    return W1, W2, B1, B2


def loss(W1, W2, B1, B2):
    total = 0
    count = 0
    for i in X_train:
        A = i
        y, Z1, Z2, h = forward_propagation(W1, W2, B1, B2, A)
        total = total + 0.5*(y - Y_train[count])**2
        count = count + 1
                    
    return total

def run(W1, W2, B1, B2, times, LEARN_RATE, MF):
    count = 0
    pd_W2, pd_W1, pd_B2, pd_B1 = 0, 0, 0, 0
    x = 0
    
    while x < times:
        count = 0
        pd_W2, pd_W1, pd_B2, pd_B1 = 0, 0, 0, 0
        for i in X_train:
            A = i
            y, Z1, Z2, h = forward_propagation(W1, W2, B1, B2, A)
            d_W2, d_W1, d_B2, d_B1 = backward_propagation(y, count, Z2, Z1, A, W2, h)                
            W1, W2, B1, B2 = update(W1, W2, d_W2, d_W1, B1, B2, d_B1, d_B2, pd_W2, pd_W1, pd_B2, pd_B1, LEARN_RATE, MF)

            if count % 6999 == 0:
                print(" Loss: ", loss(W1, W2, B1, B2))

            count = count + 1

            pd_W2, pd_W1, pd_B2, pd_B1 = d_W2, d_W1, d_B2, d_B1
            
            
        x = x + 1
        
    print('~~~~~~~~~')
    
    
    return W1, W2, B1, B2


def optimum(times, LEARN_RATE, MF):
    
    W1, W2, B1, B2 = generate()
    
    W1m, W2m, B1m, B2m = run(W1, W2, B1, B2, times, LEARN_RATE, MF)
    
    while loss(W1m, W2m, B1m, B2m) > 20:
    
        W1, W2, B1, B2 = generate()
    
        W1, W2, B1, B2 = run(W1, W2, B1, B2, times, LEARN_RATE, MF)
    
        if loss(W1, W2, B1, B2) < loss(W1m, W2m, B1m, B2m):
            W1m, W2m, B1m, B2m = W1, W2, B1, B2
            
    return W1m, W2m, B1m, B2m


def predict(W1, W2, B1, B2, Input):
    y, Z1, Z2, h = forward_propagation(W1, W2, B1, B2, Input)
    
    if y > 0.5:
        return 1
    else: 
        return 0


In [9]:
#optimum(times, LEARN_RATE, MF):
W1, W2, B1, B2 = optimum(2, 0.1, 0)


 Loss:  [877.52262566]
 Loss:  [573.65926361]
 Loss:  [574.63038667]
 Loss:  [572.97768146]
~~~~~~~~~
 Loss:  [882.86402286]
 Loss:  [597.94842336]
 Loss:  [598.23788453]
 Loss:  [597.48064035]
~~~~~~~~~
 Loss:  [854.67173743]
 Loss:  [7.26253099]
 Loss:  [7.26053843]
 Loss:  [2.9458505]
~~~~~~~~~


In [10]:
print(loss(W1, W2, B1, B2))

[2.9458505]


In [11]:
print(W1)
print(W2)
print(B1)
print(B2)

[[-2.26414965  2.26422467]
 [ 2.25680539 -2.25693643]]
[3.17602469 3.19518574]
[-2.35325195e-04 -3.14935300e-05]
[-3.27572977]


In [29]:
predictions = []
for i in X_test:
    predictions.append(predict(W1, W2, B1, B2, i))
    
print(predictions)


[0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 

In [51]:
amt = 0
correct = 0
for i in Y_test:
    if i == predictions[amt]:
        correct = correct + 1
        
    amt = amt + 1
    
    
print('Accuracy = ', (correct/amt)*100, '%')

Accuracy =  100.0 %


In [87]:
#debugging

A = X_train[3]
LEARN_RATE = 0.1
MF = 0.1

W1, W2, B1, B2 = generate()

print(W1, 'W1 shape: ', W1.shape)
print(W2, 'W2 shape: ', W2.shape)
print(B1, 'B1 shape: ', B1.shape)
print(B2, 'B2 shape: ', B2.shape)
print('~~~~~~~~~~ Initial W and B')

y, Z1, Z2, h = forward_propagation(W1, W2, B1, B2, A)
d_W2, d_W1, d_B2, d_B1 = backward_propagation(y, 3, Z2, Z1, A, W2, h)

pd_W2, pd_W1, pd_B2, pd_B1 = d_W2, d_W1, d_B2, d_B1


print(d_W1)
print(d_W2)
print(d_B1)
print(d_B2)

print('~~~~~~~~~~ first gradient values')



    
W2 = W2 + (d_W2 * LEARN_RATE) + (0)
W1 = W1 + (d_W1 * LEARN_RATE) + (0)
B2 = B2 + (d_B2 * LEARN_RATE) + (0)
B1 = B1 + (d_B1 * LEARN_RATE) + (0)

print(W1, 'W1 shape: ', W1.shape)
print(W2, 'W2 shape: ', W2.shape)
print(B1, 'B1 shape: ', B1.shape)
print(B2, 'B2 shape: ', B2.shape)

print('~~~~~~~~~~ W and B step 1 (inital + first gradients)')

y, z1, z2, h = forward_propagation(W1, W2, B1, B2, A)
d_W2, d_W1, d_B2, d_B1 = backward_propagation(y, 3, Z2, Z1, A, W2, h)

print(d_W1)
print(d_W2)
print(d_B1)
print(d_B2)

print('~~~~~~~~~~ second gradient values')

W2 = W2 + (d_W2 * LEARN_RATE) + (pd_W2 * MF)
W1 = W1 + (d_W1 * LEARN_RATE) + (pd_W1 * MF)
B2 = B2 + (d_B2 * LEARN_RATE) + (pd_B2 * MF)
B1 = B1 + (d_B1 * LEARN_RATE) + (pd_B1 * MF)

print(W1, 'W1 shape: ', W1.shape)
print(W2, 'W2 shape: ', W2.shape)
print(B1, 'B1 shape: ', B1.shape)
print(B2, 'B2 shape: ', B2.shape)

print('~~~~~~~~~~ Momentums (first gradients)')


print(W1, 'W1 shape: ', W1.shape)
print(W2, 'W2 shape: ', W2.shape)
print(B1, 'B1 shape: ', B1.shape)
print(B2, 'B2 shape: ', B2.shape)
print('~~~~~~~~~~ W and B step 2 (step1 + second gradients + MF*firstgradients)')
    
    


[[ 0.21447805 -0.49434935]
 [ 0.06571821  0.04965848]] W1 shape:  (2, 2)
[-0.01860815 -0.19868254] W2 shape:  (2,)
[0 0] B1 shape:  (2,)
[0] B2 shape:  (1,)
~~~~~~~~~~ Initial W and B
[[0.         0.        ]
 [0.02454745 0.02454745]]
[-0.         -0.01425492]
[0.         0.00229906]
[-0.12355112]
~~~~~~~~~~ first gradient values
[[ 0.21447805 -0.49434935]
 [ 0.06817295  0.05211323]] W1 shape:  (2, 2)
[-0.01860815 -0.20010803] W2 shape:  (2,)
[0.         0.00022991] B1 shape:  (2,)
[-0.01235511] B2 shape:  (1,)
~~~~~~~~~~ W and B step 1 (inital + first gradients)
[[0.         0.        ]
 [0.02455419 0.02455419]]
[-0.         -0.01478789]
[0.         0.00228331]
[-0.12270467]
~~~~~~~~~~ second gradient values
[[ 0.21447805 -0.49434935]
 [ 0.07308312  0.05702339]] W1 shape:  (2, 2)
[-0.01860815 -0.20301231] W2 shape:  (2,)
[0.         0.00068814] B1 shape:  (2,)
[-0.03698069] B2 shape:  (1,)
~~~~~~~~~~ Momentums (first gradients)
[[ 0.21447805 -0.49434935]
 [ 0.07308312  0.05702339]] W1