In [1]:
import numpy as np
import time

In [2]:
n_hidden = 10
n_in = 10
n_out = 10
n_samples = 300

learning_rate = 0.01
momentum = 0.9

np.random.seed(0)

In [3]:
def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

In [4]:
def tanh_prime(x):
    return  1 - np.tanh(x)**2

In [5]:
def train(x, t, V, W, bv, bw):

    # forward
    A = np.dot(x, V) + bv
    Z = np.tanh(A)

    B = np.dot(Z, W) + bw
    Y = sigmoid(B)

    # backward
    Ew = Y - t
    Ev = tanh_prime(A) * np.dot(W, Ew)

    dW = np.outer(Z, Ew)
    dV = np.outer(x, Ev)

    loss = -np.mean ( t * np.log(Y) + (1 - t) * np.log(1 - Y) )

    # Note that we use error for each layer as a gradient
    # for biases

    return  loss, (dV, dW, Ev, Ew)

In [6]:
def predict(x, V, W, bv, bw):
    A = np.dot(x, V) + bv
    B = np.dot(np.tanh(A), W) + bw
    return (sigmoid(B) > 0.5).astype(int)

In [7]:
# Setup initial parameters
# Note that initialization is cruxial for first-order methods!

V = np.random.normal(scale=0.1, size=(n_in, n_hidden))
W = np.random.normal(scale=0.1, size=(n_hidden, n_out))

bv = np.zeros(n_hidden)
bw = np.zeros(n_out)

params = [V,W,bv,bw]

In [8]:
# Generate some data

X = np.random.binomial(1, 0.5, (n_samples, n_in))
T = X ^ 1

In [10]:
# Train
for epoch in range(100):
    err = []
    upd = [0]*len(params)

    t0 = time.clock()
    for i in range(X.shape[0]):
        loss, grad = train(X[i], T[i], *params)

        for j in range(len(params)):
            params[j] -= upd[j]

        for j in range(len(params)):
            upd[j] = learning_rate * grad[j] + momentum * upd[j]

        err.append( loss )

    print("Epoch: %d, Loss: %.8f, Time: %.4fs" % (
                epoch, np.mean( err ), time.clock()-t0 ))

Epoch: 0, Loss: 0.45465070, Time: 0.0637s
Epoch: 1, Loss: 0.13697961, Time: 0.0455s
Epoch: 2, Loss: 0.06206941, Time: 0.0397s
Epoch: 3, Loss: 0.04092746, Time: 0.0399s
Epoch: 4, Loss: 0.03159958, Time: 0.0393s
Epoch: 5, Loss: 0.02592744, Time: 0.0417s
Epoch: 6, Loss: 0.02199575, Time: 0.0418s
Epoch: 7, Loss: 0.01907812, Time: 0.0395s
Epoch: 8, Loss: 0.01682099, Time: 0.0391s
Epoch: 9, Loss: 0.01502363, Time: 0.0392s
Epoch: 10, Loss: 0.01356039, Time: 0.0399s
Epoch: 11, Loss: 0.01234775, Time: 0.0429s
Epoch: 12, Loss: 0.01132776, Time: 0.0393s
Epoch: 13, Loss: 0.01045887, Time: 0.0390s
Epoch: 14, Loss: 0.00971052, Time: 0.0393s
Epoch: 15, Loss: 0.00905971, Time: 0.0394s
Epoch: 16, Loss: 0.00848887, Time: 0.0398s
Epoch: 17, Loss: 0.00798436, Time: 0.0421s
Epoch: 18, Loss: 0.00753542, Time: 0.0394s
Epoch: 19, Loss: 0.00713347, Time: 0.0396s
Epoch: 20, Loss: 0.00677160, Time: 0.0392s
Epoch: 21, Loss: 0.00644415, Time: 0.0390s
Epoch: 22, Loss: 0.00614650, Time: 0.0402s
Epoch: 23, Loss: 0.00

In [12]:
x = np.random.binomial(1, 0.5, n_in)
print("XOR prediction:")
print(x)
print(predict(x, *params))

XOR prediction:
[1 0 1 1 1 1 0 1 0 0]
[0 1 0 0 0 0 1 0 1 1]
