![alt text](formula.png "Title")

In [11]:
import numpy as np

class TwolayerPerceptron:
    def __init__(self, input_size, hidden_layer_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.w1 = np.random.random((input_size, hidden_layer_size))
        self.w2 = np.random.random((hidden_layer_size, output_size))
        self.b1 = np.random.random((1, hidden_layer_size))
        self.b2 = np.random.random((1, output_size))

    def sigmoid(self, x):
        return 2 / (1 + np.exp(-x)) - 1

    def sigmoid_derivative(self, x):
        return (1 - self.sigmoid(x) ** 2) / 2

    def forward(self, x):
        z1 = x@self.w1 + self.b1
        a1 = self.sigmoid(z1)
        z2 = a1@self.w2 + self.b2
        a2 = self.sigmoid(z2)
        return a2

    def gradient_calculation(self, x, y):

        # Forward pass
        z1 = x@self.w1 + self.b1
        a1 = self.sigmoid(z1)
        z2 = a1@self.w2 + self.b2
        a2 = self.sigmoid(z2)

        # Backpropagation
        error2 = a2 - y    #N * 1
        delta2 = error2 * self.sigmoid_derivative(z2)
        # delta 2 is N*1 and a1 N*3
        grad_w2 = np.expand_dims(delta2,1) * np.expand_dims(a1, -1)   #N*1*1  N*3*1
        grad_w2 = np.sum(grad_w2,0)  # 3*1
        grad_b2 = np.sum(delta2, 0, keepdims=True)
        error1 = np.expand_dims(delta2,1) * np.expand_dims(self.w2,0)   #N*1  3*1
        error1 = np.sum(error1, -1)  #N*3
        delta1 = error1 * self.sigmoid_derivative(z1)
        # delta1 is N*3 and x is N*2  
        grad_w1 = np.expand_dims(delta1,1) * np.expand_dims(x,-1) # N*2*3
        grad_w1 = np.sum(grad_w1, 0)
        grad_b1 = np.sum(delta1, 0, keepdims=True)
                
        return grad_w1, grad_w2, grad_b1, grad_b2

    def train(self, x, y, update_step_num=10, learning_rate=0.001):
        for i in range(update_step_num):
            grad_w1, grad_w2, grad_b1, grad_b2 = self.gradient_calculation(x, y)

            self.w1 -= learning_rate * grad_w1
            self.w2 -= learning_rate * grad_w2
            self.b1 -= learning_rate * grad_b1
            self.b2 -= learning_rate * grad_b2

In [12]:
hidden_layer_sizes = 5
x = np.array([[0,0],[0,1],[1,0],[1,1]])
# y = np.array([[0,0,1],[0,0,0],[0,0,0],[0,0,1]])
y = np.array([1,0,0,1]).reshape([-1,1]) 

mlp = TwolayerPerceptron(x.shape[1], hidden_layer_sizes, y.shape[1])
mlp.train(x, y, update_step_num=10000, learning_rate=0.1)

# Test the trained model
predictions = mlp.forward(x)
print("Predictions:")
print(predictions)

Predictions:
[[9.85532587e-01]
 [5.46579753e-04]
 [4.98253776e-04]
 [9.81596638e-01]]


In [13]:
# import matplotlib.pyplot as plt
# t = np.linspace(0, 10, 1000).reshape([-1,1])
# mm = TwolayerPerceptron(1,16,1)
# X_train = np.sin(t)
# Y_train = np.cos(t)
# mm.train(X_train, Y_train,1000)

# y_hat = mm.forward(X_train)
# plt.plot(Y_train)
# plt.plot(y_hat)