* We're going to implement a neural network composed with 2 layers 
* We're going to use sigmoid as an activation function
* MSE for the cost function 
* Our goal in training is to find the best set of weights and biases that minimizes the loss function.

* X1   |    X2  |    X3  |       Y
* 0    |     0  |      1 |       0                          
* 0    |     1  |      1 |       1
* 1    |     0  |      1 |       1
* 1    |     1  |      1 |       0

In [5]:
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

In [13]:
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [19]:
class Neural_Network:
    def __init__(self, x, y):
        self.input = x
        self.weights1 = np.random.rand(self.input.shape[1], 4)
        self.weights2 = np.random.rand(4, 1)
        self.y = y
        self.output = np.zeros(self.y.shape)
        self.learning_rate = 0.1
        
        
    def feed_forward(self):
        self.layer1 = sigmoid(np.dot(self.input, self.weights1))
        self.output = sigmoid(np.dot(self.layer1, self.weights2))
        
# we need to know the derivative of the loss function with respect to the weights and biases to properly adjust them
# Gradient Descent: If we have the derivative, we can simply update the weights and biases by increasing/reducing
# we can’t directly calculate the derivative of the loss function with respect to the weights and biases 
# because the equation of the loss function does not contain the weights and biases.
        
    def back_prop(self):
        # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
        """
        Loss(y, y_pred) = sum(y - y_pred)**2
        d(Loss(y, y_pred))/d(weight) = [d(Loss(y, y_pred))/d(y_pred)]*[d(y_pred)/d(z)]*[d(z)/d(weight)]
                                     = 2 * (y - y_pred) * derivative_sigmoid * x
                                     = 2 * (y - y_pred) * z * (1 - z) * x
        """
        d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
        d_weights1 = np.dot(self.input.T,  (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))

        # update the weights with the derivative (slope) of the loss function
        self.weights1 += self.learning_rate * d_weights1
        self.weights2 += self.learning_rate * d_weights2
        
        
        

In [15]:
# Input
x = np.array([[0,0,1],[0,1,1],[1,0,1],[1,1,1]])
print(x.shape)  

(4, 3)


In [16]:
y = np.array([0,1,1,0])
print(y.shape)

(4,)


In [17]:
y = np.reshape(y,(4,1))
y

array([[0],
       [1],
       [1],
       [0]])

In [20]:
model = Neural_Network(x, y)

while True:
    
    model.feed_forward()
    model.back_prop()
    
    if np.mean((model.output - model.y)**2)<0.001:
        break

In [21]:
model.weights1

array([[  3.41599845, -16.34692106,  18.71483868,  -0.32510728],
       [  3.37981656,  18.91333796, -16.1422151 ,  -0.4266523 ],
       [ -2.18086639,  -0.80654112,  -0.80586001,   2.43175318]])

In [22]:
model.weights2

array([[ 19.3539246 ],
       [-15.24254566],
       [-15.4107224 ],
       [  4.18546488]])

In [23]:
model.output

array([[0.02530387],
       [0.96499   ],
       [0.96492279],
       [0.03005382]])

In [24]:
from sklearn.metrics import average_precision_score

average_precision = average_precision_score(model.y, model.output)

In [25]:
average_precision

1.0