In [1]:
import numpy as np

# Sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of the sigmoid function
def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

def SSE(y, y_hat):
    #sum of the squared errors (SSE)
    return 0.5*np.sum((y - y_hat)**2)

In [2]:
targets = np.array([2.3, 4.9])

inputs = np.array([
    [2,4,5],
    [6,7,8]
])
n_records, n_inputs = inputs.shape
n_hidden = 4  # number of hidden units

### Weights
We want these to be small such that the input to the sigmoid is in the linear region near 0 and not squashed at the high and low ends. It's also important to initialize them randomly so that they all have different starting values and diverge, breaking symmetry. So, we'll initialize the weights from a normal distribution centered at 0. A good value for the scale is 1/√n where n is the number of input units. This keeps the input to the sigmoid low for increasing numbers of input units.

In [55]:
weights = {
    'weights_input_hidden': np.random.normal(scale=1/n_inputs**.5, size=(n_inputs, n_hidden)),
    'weights_hidden_output': np.random.normal(scale=1/n_inputs**.5, size=n_hidden)
}

# Neural Network hyperparameters
epochs = 5000
learnrate = 0.005

last_loss = None


# delta_error_hidden = np.dot(delta_error_output, weights['weights_hidden_output']) #* hidden_output(1 - hidden_output)

for e in range(epochs):
    del_w_input_hidden = np.zeros(weights['weights_input_hidden'].shape)
    del_w_hidden_output = np.zeros(weights['weights_hidden_output'].shape)

    for x, y in zip(inputs, targets):
        hidden_output = sigmoid(np.dot(x, weights['weights_input_hidden']))
        output = np.dot(hidden_output, weights['weights_hidden_output'])
        ## Backward pass ##
        delta_error_output = (y - output)

        # δh = W * δo * f′(h)
        delta_hidden_error = np.dot(delta_error_output, weights['weights_hidden_output']) * hidden_output * (1 - hidden_output)

        # TODO: Update the change in weights
        del_w_hidden_output += delta_error_output * hidden_output
        del_w_input_hidden += delta_hidden_error * x[:, None]
    
    # Update weights
    weights['weights_input_hidden'] += learnrate * del_w_input_hidden / n_records
    weights['weights_hidden_output'] += learnrate * del_w_hidden_output / n_records
    
    # Printing out the mean square error on the training set
    if e % (epochs / 50) == 0:
        hidden_output = sigmoid(np.dot(inputs, weights['weights_input_hidden']))
        out = np.dot(hidden_output, weights['weights_hidden_output'])
        loss = np.mean((out - targets) ** 2)

        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss

Train loss:  14.2665527607
Train loss:  4.073877638
Train loss:  1.62061147507
Train loss:  1.42540319315
Train loss:  1.33267688656
Train loss:  1.23749133537
Train loss:  1.1329340063
Train loss:  1.02146032001
Train loss:  0.897418505767
Train loss:  0.762365125298
Train loss:  0.622354600346
Train loss:  0.487108663416
Train loss:  0.366860694159
Train loss:  0.267304889849
Train loss:  0.189174937881
Train loss:  0.130553737406
Train loss:  0.0882503510278
Train loss:  0.0586821949159
Train loss:  0.038525719334
Train loss:  0.0250453112499
Train loss:  0.0161593999047
Train loss:  0.0103658843461
Train loss:  0.0066198464816
Train loss:  0.00421298657373
Train loss:  0.002674046179
Train loss:  0.00169372163746
Train loss:  0.00107104535341
Train loss:  0.000676425263728
Train loss:  0.000426773129487
Train loss:  0.000269049675339
Train loss:  0.000169511292759
Train loss:  0.000106746210417
Train loss:  6.71952642047e-05
Train loss:  4.22855919137e-05
Train loss:  2.66036628798