In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import expit

In [2]:
def propagate(X, V, W, b):
    U1 = np.dot(V, X.T).T + b
    Z = f_act(U1)
    U2 =  np.dot(W, Z.T).T
    Y = U2
    return Y, Z, U1, U2

def backpropagate(X, YT, V, W, b):
    Y, Z, U1, U2 = propagate(X, V, W, b)
    # output layer
    ##########################################
    # replace with a backprop implementation #
    dW = np.zeros_like(W)                    #
    dV = np.zeros_like(V)                    #
    db = np.zeros_like(b)                    #
    ##########################################
    return db, dV, dW, Cost(Y,YT)

relu = lambda v: np.maximum(v, 0.0)
relu_prime = lambda v: np.where(v>0.0, np.ones_like(v), np.zeros_like(v))
f_act = relu
f_act_prime = relu_prime

Cost = lambda Y, YT: 1./2. * np.mean((Y - YT)**2)


In [3]:
def train(X, YT, V, W, b, niter=10000, base_lr=0.2):
    """
    data                             X
    target values                    YT
    input->hidden weights            V
    hidden->output weights           W
    hidden biases                    b
    number of training iterations    niter
    learning rate                    base_lr
    """
    SEED = 734589
    np.random.seed(SEED)

    K = len(b)
    eta = base_lr / K
    mu = 0.5
    T = np.random.randint(0, len(X), niter)

    cost = np.zeros(len(T))
    # prepare momentum term variables
    delta_W = np.zeros((1, K))
    delta_b = np.zeros(K)
    delta_V = np.zeros((K, 1))

    for run, inp in enumerate(T):
        db, dV, dW, cost[run] = backpropagate(X[inp], YT[inp], V, W, b)

        # calculate weight update with momentum
        # instead of applying the gradients directly
        # this applies a low-pass filtered version
        # which smooths out the updates and helps
        # stabilise the training
        # https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum
        delta_b = (1.-mu) * db + mu * delta_b
        delta_V = (1.-mu) * dV + mu * delta_V
        delta_W = (1.-mu) * dW + mu * delta_W
        # update weights
        b += eta * delta_b
        V += eta * delta_V
        W += eta * delta_W
    
    return V, W, b, cost

In [4]:
data = np.load("xor_data.npz")
X = data['inp']
YT = data['out']

V = np.array([[1.0, 0.0], [1.0, 1.0], [0.0, 1.0]])
W = np.array([[1.05, -2.0, 1.0]])
b = np.array([0.0, -1.0, 0.0])