## Imports

In [3]:
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline
#plt.style.use("Sepehr")

## Data

In [4]:
X = np.array([[0,0,1],
              [0,1,1],
              [1,0,1],
              [1,1,1]])

y = np.array([[0],[1],[1],[0]])

## Activation Function

In [5]:
def sigmoid(z):
    return 1.0/(1+np.exp(-z))

def sigmoid_derivative(z):
    return sigmoid(z) * (1.0-sigmoid(z))

## Neural Network

<img src="./2layers.png">

The output is given by:  $\hat{y} = \sigma(W_2\sigma(W_1x+b_1)+b_2)$

Each iteration of the training process consists of the following steps:

* Calculating the predicted output $\hat{y}$, known as **feedforward**.
* Updating the weights and biases, known as **backpropagation**.


<img src="./net.png">


In [6]:
class NeuralNet(object):
    def __init__(self, X, y):
        # Define the input of the network
        self.input = X
        # Define the desired output of the network
        self.y = y.reshape(-1, 1)
        # Define the actual output of the network
        self.yhat = np.zeros(self.y.shape)
        # Size of the input
        self.n, self.m = self.input.shape
        # Number of units in the hidden layer
        self.n_hidden = 4
        # Dimension of the output
        self.n_output = 1
        # Randomly choose the weights and biases.
        self.w1 = np.random.rand(self.m, self.n_hidden)
        self.w2 = np.random.rand(self.n_hidden, self.n_output)
        self.b1 = np.random.rand(self.n, self.n_hidden)
        self.b2 = np.random.rand(self.n, self.n_output)

   
    def feedforward(self):
        """
        Performs the feedforward operation.
        """
        self.z1 = np.matmul(self.input, self.w1) + self.b1
        self.a1 = sigmoid(self.z1)
        
        self.z2 = np.matmul(self.a1, self.w2) + self.b2
        self.yhat = sigmoid(self.z2)


    def backprop(self):
        """
        Performs the backpropagation.
        """
        
        self.loss = self.y - self.yhat
        
        dw2 = np.dot(self.a1, sigmoid_derivative(self.z2) * self.loss)
        dw1 = np.dot(self.input.T, np.dot(self.loss*sigmoid_derivative(self.z2), self.w2.T) * sigmoid_derivative(self.z1))
        
        db2 = np.matmul(self.loss.T, sigmoid_derivative(self.z2))
        db1 = np.matmul(self.loss*sigmoid_derivative(self.z2), self.w2.T) * sigmoid_derivative(self.z1)
        
        self.w1 += dw1
        self.w2 += dw2
        self.b1 += db1
        self.b2 += db2
        
    def train(self, X, y):
        self.feedforward()
        self.backprop()
        
    def predict(self, X):
        logit1 = np.dot(X, self.w1) + self.b1
        layer1 = sigmoid(z1)
        
        logit2 = np.dot(layer1, self.w2) + self.b2
        return sigmoid(z2)

## Training

In [17]:
NN = NeuralNet(X,y)
for i in range(100000): # trains the NN 100 times
    NN.train(X, y)

In [18]:
NN.yhat

array([[0.00123413],
       [0.99868382],
       [0.99875526],
       [0.00133029]])

In [9]:
y

array([[0],
       [1],
       [1],
       [0]])

#### Mean Squared Error

In [19]:
MSE = .25* sum((NN.yhat - y)**2)
MSE

array([1.64361283e-06])