In [1]:
import numpy as np

def relu(x):
    return np.maximum(np.zeros(x.shape), x)

def drelu(x):
    return 1.0 * (x > 0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def dsigmoid(x):
    return sigmoid(x) * sigmoid(1 - x)

relu.der = drelu
sigmoid.der = dsigmoid

input_dim = 2
layers = [2, 1]
activations = [relu, sigmoid]

In [2]:
w = []
b = []
for i in range(len(layers)):
    w.append(np.random.random((layers[i], layers[i - 1] if i - 1 >= 0 else input_dim)))
    b.append(np.zeros((layers[i], 1)))

def forward(x):
    assert x.shape == (input_dim, 1)
    z = [0]
    a = [x]
    for i in range(len(layers)):
        z.append(np.dot(w[i], a[-1]) + b[i])
        a.append(activations[i](z[-1]))
    return z, a

def backprop(z, a, y, learning_rate=0.05):
    loss = np.sum((a[-1] - y) ** 2) / 2

    da = [0 for i in range(len(layers) + 1)]
    db = [0 for i in range(len(layers) + 1)]
    dz = [0 for i in range(len(layers) + 1)]
    dw = [0 for i in range(len(layers) + 1)]
    da[-1] = a[-1] - y

    for ri in reversed(range(len(layers) + 1)):
        if ri == 0:
            break
        dz[ri] = activations[ri - 1].der(z[ri]) * da[ri]
        dw[ri] = np.dot(dz[ri], a[ri - 1].T)
        db[ri] = dz[ri]
        da[ri - 1] = np.dot(w[ri - 1].T, dz[ri])

        w[ri - 1] -= dw[ri] * learning_rate
        b[ri - 1] -= db[ri] * learning_rate

In [3]:
import numpy as np

# training data
x_train = np.asarray([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.asarray([[[0]], [[1]], [[1]], [[0]]])

for i in range(100000):
    z, a = forward(x_train[i % 4].reshape(2, 1))
    backprop(z, a, y_train[i % 4].reshape(1, 1))

In [4]:
for i in range(4):
    print(forward(x_train[i].reshape(2, 1))[1][-1])

[[0.02515469]]
[[0.99300961]]
[[0.99300732]]
[[0.00957893]]
