In [None]:
# http://iamtrask.github.io/2015/11/15/anyone-can-code-lstm/

In [None]:
import copy, numpy as np

In [None]:
def sigmoid(x):
    # return 1.0 / (1.0 + np.exp(-x)) if x >= 0 else np.exp(x) / (1.0 + np.exp(x))
    return np.where(x >= 0, 1.0 / (1.0 + np.exp(-x)), np.exp(x) / (1.0 + np.exp(x)))


def sigmoid_derivative(d_output):
    return d_output * (1 - d_output)

In [None]:
binary_dim = 8

largest_number = pow(2, binary_dim)
int2binary = np.unpackbits(np.array([range(largest_number)], dtype=np.uint8).T, axis=1)

In [None]:
learning_rate = 0.1
input_dim = 2
hidden_dim = 16
output_dim = 1
epochs = 10000

w_ih = 2 * np.random.random((input_dim, hidden_dim)) - 1  # input_layer to hidden_layer parameters
w_hh = 2 * np.random.random((hidden_dim, hidden_dim)) - 1  # hidden_layer to next_hidden_layer parameters
w_ho = 2 * np.random.random((hidden_dim, output_dim)) - 1  # hidden_layer to output_layer parameters

w_ih_update = np.zeros_like(w_ih)
w_hh_update = np.zeros_like(w_hh)
w_ho_update = np.zeros_like(w_ho)

for epoch in range(epochs):
    # generate input data and output data
    a_int = np.random.randint(0, largest_number / 2)
    a = int2binary[a_int]
    b_int = np.random.randint(0, largest_number / 2)
    b = int2binary[b_int]
    c_int = a_int + b_int
    c = int2binary[c_int]

    d = np.zeros_like(c)

    # forward learning
    layer_output_deltas = list()
    layer_hidden_values = list()
    layer_hidden_values.append(np.zeros(hidden_dim))
    for i in range(binary_dim):
        position_binary = binary_dim - i - 1
        xi = np.array([[a[position_binary], b[position_binary]]])
        yi = np.array([[c[position_binary]]]).T

        layer_hidden = sigmoid(np.dot(xi, w_ih) + np.dot(layer_hidden_values[-1], w_hh))
        layer_output = sigmoid(np.dot(layer_hidden, w_ho))

        layer_output_error = yi - layer_output
        layer_output_deltas.append(layer_output_error * sigmoid_derivative(layer_output))

        layer_hidden_values.append(copy.deepcopy(layer_hidden))

        d[position_binary] = np.round(layer_output[0][0])

    # backward learning
    future_layer_hidden_delta = np.zeros(hidden_dim)
    for i in range(binary_dim):
        layer_output_delta = layer_output_deltas[-i - 1]
        layer_hidden = layer_hidden_values[-i - 1]
        w_ho_update += np.atleast_2d(layer_hidden).T.dot(layer_output_delta)

        layer_hidden = layer_hidden_values[-i - 1]
        layer_hidden_delta = (future_layer_hidden_delta.dot(w_hh.T) + layer_output_delta.dot(
            w_ho.T)) * sigmoid_derivative(layer_hidden)
        pre_layer_hidden = layer_hidden_values[-i - 2]

        w_hh_update += np.atleast_2d(pre_layer_hidden).T.dot(layer_hidden_delta)

        x = np.array([[a[i], b[i]]])
        w_ih_update += x.T.dot(layer_hidden_delta)

        future_layer_hidden_delta = layer_hidden_delta

    w_ho += learning_rate * w_ho_update
    w_hh += learning_rate * w_hh_update
    w_ih += learning_rate * w_ih_update

    w_ho_update *= 0
    w_hh_update *= 0
    w_ih_update *= 0

    if epoch % 1000 == 0:
        print("Pred:" + str(d))
        print("True:" + str(c))
        out = 0
        for index, x in enumerate(reversed(d)):
            out += x * pow(2, index)
        print(str(a_int) + " + " + str(b_int) + " = " + str(out))
        print("------------")

import copy, numpy as np


np.random.seed(2)