In [1]:
import numpy as np

In [657]:
class MLP:
    def __init__(self, n_inputs=5, hidden_layers=[4, 3, 3], n_outputs=2):
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.hidden_layers = hidden_layers

        self.layers = [n_inputs] + hidden_layers + [n_outputs]

        self.weights = []
        for i in range(len(self.layers) - 1):
            _w = np.random.rand(self.layers[i], self.layers[i + 1])
            self.weights.append(_w)

        self.outputs = []
        for i in range(len(self.layers)):
            _o = np.zeros(self.layers[i])
            self.outputs.append(_o)

        self.weight_gradients = []
        for i in range(len(self.layers) - 1):
            _g = np.zeros((self.layers[i], self.layers[i + 1]))
            self.weight_gradients.append(_g)

    def _sigmoid(self, X):
        return 1 / (1 + np.exp(-X))

    def _sigmoid_derivative(self, X):
        sigmoid = self._sigmoid(X)
        return sigmoid * (1 - sigmoid)

    def forward_propogation(self, X):
        output = X
        self.outputs[0] = output
        for i in range(len(self.weights)):
            output = np.dot(output, self.weights[i])
            output = self._sigmoid(output)
            self.outputs[i + 1] = output

        return output

    def backward_propagation(self, error, learning_rate=0.1):
        for i in reversed(range(len(self.weight_gradients))):
            delta = error * self._sigmoid_derivative(self.outputs[i + 1])
            self.weight_gradients[i] = np.dot(self.outputs[i].T, delta)
            self.weights[i] = self.weights[i] - learning_rate * self.weight_gradients[i]
            error = np.dot(delta, self.weights[i].T)

    def train(self, X, y, n_epochs=10, learning_rate=0.1):
        y_pred = self.forward_propogation(X)
        error = y_pred - y

        # Batched gradient descent.
        for i in range(n_epochs):
            self.backward_propagation(error, learning_rate=learning_rate)

In [658]:
mlp = MLP(n_inputs=2, hidden_layers=[5], n_outputs=2)
mlp.weights

[array([[0.91267081, 0.47186504, 0.28635759, 0.35631882, 0.18479119],
        [0.92570175, 0.67690276, 0.80771062, 0.14464088, 0.28882909]]),
 array([[0.99245521, 0.04281589],
        [0.14403246, 0.55520401],
        [0.69863201, 0.68941947],
        [0.45934191, 0.1310594 ],
        [0.35070261, 0.55300024]])]

In [659]:
mlp.weights

[array([[0.91267081, 0.47186504, 0.28635759, 0.35631882, 0.18479119],
        [0.92570175, 0.67690276, 0.80771062, 0.14464088, 0.28882909]]),
 array([[0.99245521, 0.04281589],
        [0.14403246, 0.55520401],
        [0.69863201, 0.68941947],
        [0.45934191, 0.1310594 ],
        [0.35070261, 0.55300024]])]

In [660]:
mlp.weight_gradients

[array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 array([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])]

In [661]:
x = np.concatenate([np.random.rand(5, 2), -np.random.rand(5, 2)])
y = np.array(
    [[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]
)

In [662]:
x.shape, y.shape

((10, 2), (10, 2))

In [701]:
mlp.train(x, y, learning_rate=0.8)

y pred:  [[0.96597292 0.02849641]
 [0.97797005 0.01928219]
 [0.95923184 0.03374905]
 [0.97831102 0.01901407]
 [0.95956871 0.03336097]
 [0.0511274  0.93684936]
 [0.02560733 0.96563041]
 [0.03564553 0.95401418]
 [0.02985503 0.96056726]
 [0.03284808 0.95745555]] y true:  [[1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]]
diff:  [[-0.03402708  0.02849641]
 [-0.02202995  0.01928219]
 [-0.04076816  0.03374905]
 [-0.02168898  0.01901407]
 [-0.04043129  0.03336097]
 [ 0.0511274  -0.06315064]
 [ 0.02560733 -0.03436959]
 [ 0.03564553 -0.04598582]
 [ 0.02985503 -0.03943274]
 [ 0.03284808 -0.04254445]]


In [524]:
mlp.weights

[array([[0.51056273, 0.16882731, 0.77915055, 0.14547723, 0.67026609],
        [0.66879042, 0.90033084, 0.36250304, 0.59518435, 0.16609523]]),
 array([[0.70368879, 0.60926578],
        [0.05396637, 0.24030178],
        [0.73990428, 0.36267883],
        [0.58920265, 0.07766133],
        [0.76043773, 0.26084411]])]

In [525]:
mlp.outputs

[array([[ 0.58111443,  0.81755425],
        [ 0.71014918,  0.02143243],
        [ 0.96887708,  0.92736208],
        [ 0.00637792,  0.82727616],
        [ 0.0284312 ,  0.061791  ],
        [-0.53789905, -0.6586294 ],
        [-0.448355  , -0.04383255],
        [-0.24430612, -0.2800475 ],
        [-0.32013284, -0.04138332],
        [-0.01418263, -0.71927023]]),
 array([[0.69924175, 0.69761121, 0.67850695, 0.63818683, 0.62764209],
        [0.59313456, 0.53495456, 0.63642896, 0.52853423, 0.6172517 ],
        [0.75308716, 0.73123081, 0.74801426, 0.66552623, 0.68978885],
        [0.63569689, 0.67852728, 0.57531081, 0.62035773, 0.53488721],
        [0.51396072, 0.5151324 , 0.51110035, 0.5101678 , 0.50727807],
        [0.32842937, 0.33507839, 0.34164866, 0.38528844, 0.38527615],
        [0.43579293, 0.47109715, 0.40988755, 0.47749215, 0.42392364],
        [0.4226012 , 0.42701499, 0.4277506 , 0.44995176, 0.4479211 ],
        [0.45235854, 0.47708233, 0.43441762, 0.4824314 , 0.44506475],
        

In [485]:
mlp.forward_propogation(x), y

(array([[0.83437951, 0.88727723],
        [0.83460344, 0.88747566],
        [0.8342642 , 0.88717466],
        [0.83441957, 0.88731175],
        [0.8345804 , 0.88745436],
        [0.83347235, 0.88646988],
        [0.83210534, 0.88524091],
        [0.83288533, 0.88594502],
        [0.83230204, 0.88542361],
        [0.8320844 , 0.8852249 ]]),
 array([[1, 0],
        [1, 0],
        [1, 0],
        [1, 0],
        [1, 0],
        [0, 1],
        [0, 1],
        [0, 1],
        [0, 1],
        [0, 1]]))