In [None]:
import numpy as np

### we need to define the activation function

def relu(z):
    return np.maximum(0, z)

def sigmoid(z):
    return 1/(1+  np.exp(-z))

## Now we need to find the derivetive of  relu and segmoid
def relu_derivative(z):
    return(z>0).astype(float)

def sigmoid_derivative(z):
    a = sigmoid(z)

    return a * (1-a)


# Now we can define the forword propagation
class TinyProp_function:
    def __init__(self, input_dim =None, hidden_dim = None, output_dim = None, learning_rate = 0.1 ):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.learning_rate = learning_rate
        # we will work with  One hidden layer

        self.w1 = np.random.randn(input_dim, hidden_dim)
        self.b1 = np.zeros((1, hidden_dim))
        self.w2 = np.random.randn(hidden_dim, output_dim)
        self.b2 = np.zeros((1, output_dim))

#  Now we need to do the forward propagation
    def forward(self, X):
        self.X = X

        # hidden layer linear
        self.Z1 = self.X @ self.w1 + self.b1
        # activation
        self.A1 = relu(self.Z1)

        # output layer
        self.Z2 = self.A1 @ self.w2 + self.b2

        # activation function
        self.A2 = sigmoid(self.Z2)
        return self.A2

    # After this we need to compute the loos
    def compute_loos(self, y):
        n = y.shape[0] # n_sample
        loss = np.mean ((self.A2- y)**2)
        return loss

    ''' Now that we compute the loss we need to go back doing back propagation to correct our error,
    to do that we need to do the derivative from output to input which is gradient '''

    # now we have to do back propagation now
    def backprop(self, y):
        n = y.shape[0]
        # derivative of A2 according to the loos
        # dL/ dA2 = 2 * (self.A2 - y)/n
        dA2 = 2 * (self.A2 - y)/n

        # Now dA2/ dZ2
        dZ2 = dA2 * sigmoid_derivative(self.A2)

        # Z2 = xW2 + b2
        dw2 = self.A1.T  @ dZ2

        db2 = np.sum(dZ2, axis= 0, keepdims = True)

        # now A1
        dA1 = dZ2 @ self.w2.T

        # let do dz1
        dZ1 = dA1 * relu_derivative(self.Z1)

        dw1 = self.X.T @ dZ1

        db1 = np.sum(dZ1, axis= 0, keepdims= True)

        # now we need to update weight and bias
        lr = self.learning_rate
        self.w2 -= lr * dw2
        self.b2 -= lr * db2
        self.w1 -= lr * dw1
        self.w1 -= lr * db1

    # do the training step
    # here we call forward, loss and back function
    def train_step(self, X, y):
        #  forward
        first_pred = self.forward(X)

        # loos
        loss = self.compute_loos(y)

        # backward(Update parameter)
        self.backprop(y)
        return loss, first_pred




### text code

In [None]:
if __name__ == "__main__":

    X = np.array([
        [0.0, 0.0],
        [0.0, 1.0],
        [1.0, 0.0],
        [1.0, 1.0],
    ])

    y = np.array([
        [0.0],
        [1.0],
        [1.0],
        [0.0],
    ])

    net = TinyProp_function(input_dim= 2, hidden_dim= 3, output_dim= 1, learning_rate= 0.1)

    for epoch in range (1000):
        loss, first_pred = net.train_step(X, y)
        if (epoch + 1) % 100 ==0:
            print(f'Epoch {epoch+1}, Loss = {loss:.4f}')
    print("\nFinal predictions:")
    print(first_pred)


Epoch 100, Loss = 0.1610
Epoch 200, Loss = 0.1137
Epoch 300, Loss = 0.0720
Epoch 400, Loss = 0.0431
Epoch 500, Loss = 0.0262
Epoch 600, Loss = 0.0168
Epoch 700, Loss = 0.0113
Epoch 800, Loss = 0.0079
Epoch 900, Loss = 0.0058
Epoch 1000, Loss = 0.0044

Final predictions:
[[0.08573381]
 [0.96655047]
 [0.95890866]
 [0.08573381]]
