In [28]:
import sklearn.datasets as datasets
import sklearn
import numpy as np

# implement exor gate using backpropagation neural network from scratch
# using sigmoid activation function

xt = np.array([[0,0],[0,1],[1,0],[1,1]])
yt = np.array([[0],[1],[1],[0]])

# initialize weights
w1 = np.random.rand(2,2)
w2 = np.random.rand(2,1)

# initialize bias
b1 = np.random.rand(2)
b2 = np.random.rand(1)

# define sigmoid function
def sigmoid(x):
    return 1/(1+np.exp(-x))

# define derivative of sigmoid function
def sigmoid_derivative(x):
    return sigmoid(x)*(1-sigmoid(x))
 
# define forward propagation
def forward_propagation(x):
    z1 = np.dot(x,w1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1,w2) + b2
    a2 = sigmoid(z2)
    return z1,a1,z2,a2
 
# define backward propagation
def backward_propagation(x,y,z1,a1,z2,a2):
    dz2 = a2 - y
    dw2 = np.dot(a1.T,dz2)
    db2 = np.sum(dz2,axis=0,keepdims=True)
    dz1 = np.dot(dz2,w2.T)*sigmoid_derivative(z1)
    dw1 = np.dot(x.T,dz1)
    db1 = np.sum(dz1,axis=0)
    return dz2,dw2,db2,dz1,dw1,db1


# define gradient descent
def gradient_descent(x,y,epochs,lr, w1, w2, b1, b2):
    for i in range(epochs):
        z1,a1,z2,a2 = forward_propagation(x)
        dz2,dw2,db2,dz1,dw1,db1 = backward_propagation(x,y,z1,a1,z2,a2)
        w1 = w1 - lr*dw1
        b1 = b1 - lr*db1
        w2 = w2 - lr*dw2
        b2 = b2 - lr*db2
        if i%1000==0:
            print("cost: ",np.mean(np.square(y-a2)))
    return w1,b1,w2,b2

# define predict function
def predict(x):
    z1,a1,z2,a2 = forward_propagation(x)
    return a2

# train the model
w1,b1,w2,b2 = gradient_descent(xt,yt,1000,0.01, w1, w2, b1, b2)
 
# predict
print("prediction: ",predict(xt))
  
# print accuracy

# print("accuracy: ",sklearn.metrics.accuracy_score(yt,np.round(predict(xt))))

cost:  0.3292953156205685
prediction:  [[4.99116965e-08]
 [7.67757434e-09]
 [1.43295916e-08]
 [2.61755184e-09]]
