In [2]:
import math


def sigmoid(t):
    return 1 / (1 + math.exp(-t))


def neuron_output(weights, inputs):
    # For simplicity, we include the bias term in weights
    return sigmoid(dot(weights, inputs))


def dot(v, w):
    # Calculates the dot product of the two input vectors
    return sum(v_i * w_i for v_i, w_i in zip(v, w))


def feed_forward(neural_network, input_vector):
    # Feed the input vector through the NN,
    # and return the outputs of all layers separately
    outputs = []

    for layer in neural_network:
        # Add a constant (bias), we use 1 for simplicity
        input_with_bias = input_vector + [1]
        # Calculate the output for each neuron in the layer
        output = [neuron_output(neuron, input_with_bias)
                  for neuron in layer]
        # Add output results
        outputs.append(output)

        # Assign the current layer output as the next layers input
        input_vector = output

    return outputs


xor_network = [  # hidden layer
    [[20.0, 20.0, -30.0],      # 'and' neuron
     [20.0, 20.0, -10.0]],     # 'or'  neuron
    # output layer
    [[-60.0, 60.0, -30.0]]]    # '2nd input but not 1st input' neuron

# feed_forward returns the outputs of all layers, so the [-1] gets the
# final output, and the [0] gets the value out of the resulting vector
print("XOR Manual Output:", feed_forward(xor_network, [0, 1])[-1][0])


def sqerror_gradients(network, input_vector, target_vector):
    # Using an input NN, input vector and target (output) vector
    # First make a prediction,
    # Then compute the gradient of the squared error loss
    # (with respect to the neuron weights)

    # forward pass
    hidden_outputs, outputs = feed_forward(network, input_vector)

    # gradients with respect to output neuron pre-activation outputs
    output_deltas = [output * (1 - output) * (output - target)
                     for output, target in zip(outputs, target_vector)]

    # gradients with respect to output neuron weights
    output_grads = [[output_deltas[i] * hidden_output
                     for hidden_output in hidden_outputs + [1]]
                    for i, output_neuron in enumerate(network[-1])]

    # gradients with respect to hidden neuron pre-activation
    hidden_deltas = [hidden_output * (1 - hidden_output) *
                     dot(output_deltas, [n[i] for n in network[-1]])
                     for i, hidden_output in enumerate(hidden_outputs)]

    # gradients with respect to hidden neuron weights
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]]
                    for i, hidden_neuron in enumerate(network[0])]

    return [hidden_grads, output_grads]


def scalar_multiply(c, v):
    # Multiply every element of vector v by constant c
    return [c * v_i for v_i in v]


def add(v, w):
    # Add corresponding elements of two vectors
    return [v_i + w_i for v_i, w_i in zip(v, w)]


def gradient_step(v, gradient, step_size):
    # Moves by step_size in the gradient direction from vector v
    step = scalar_multiply(step_size, gradient)
    return add(v, step)


if __name__ == "__main__":
    import random
    random.seed(0)

    # training data
    xs = [[0.0, 0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]
    ys = [[0.0], [1.0], [1.0], [0.0]]

    # start with random weights
    network = [  # hidden layer: 2 inputs -> 2 outputs
        [[random.random() for _ in range(2 + 1)],   # 1st hidden neuron
         [random.random() for _ in range(2 + 1)]],  # 2nd hidden neuron
        # output layer: 2 inputs -> 1 output
        [[random.random() for _ in range(2 + 1)]]   # 1st output neuron
    ]

    import tqdm

    # Our models learning rate (i.e. gradient step increment)
    learning_rate = 1.0

    # For each epoch in range(20000)
    for epoch in tqdm.trange(20000, desc="Training XOR NN"):
        # for each learning input, target pair
        for x, y in zip(xs, ys):
            # Calculate the network gradients
            gradients = sqerror_gradients(network, x, y)

            # Take a gradient step for each neuron in each layer
            network = [[gradient_step(neuron, grad, -learning_rate)
                        for neuron, grad in zip(layer, layer_grad)]
                       for layer, layer_grad in zip(network, gradients)]

    # check that it learned XOR
    print("\nXOR Trained Output [0, 0]:", feed_forward(network, [0, 0])[-1][0])
    print("XOR Trained Output [0, 1]:", feed_forward(network, [0, 1])[-1][0])
    print("XOR Trained Output [1, 0]:", feed_forward(network, [1, 0])[-1][0])
    print("XOR Trained Output [1, 1]:", feed_forward(network, [1, 1])[-1][0])

    # Print the network weights
    print("\nNetwork Learned Weights:\n", network)


XOR Manual Output: 0.9999999999999059


Training XOR NN: 100%|██████████| 20000/20000 [00:00<00:00, 22847.85it/s]


XOR Trained Output [0, 0]: 0.009033699537611711
XOR Trained Output [0, 1]: 0.9923292625479107
XOR Trained Output [1, 0]: 0.9923280275107558
XOR Trained Output [1, 1]: 0.007855695468228051

Network Learned Weights:
 [[[6.953505610104288, 6.952785792366961, -3.148476196504664], [5.115899442661919, 5.115407875835948, -7.839603434415658]], [[10.961705832630566, -11.63060534664317, -5.144229056613083]]]



