In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [12]:
class LSTM:
    """End-to-end many to many LSTM network implementation with numpy."""
    def __init__(self, input_size, units):
        """
        Specify the Network's architecture.
        
        Initializes weights and biases of the Network.
        
        Parameters
        ----------
        input_size (int) : size of the input vector X
        units (int) : Number of LSTM cells
        """
        self.input_size = input_size
        self.units = units
        #self.W = np.random.randn(4, input_size) # this contains [Wa, Wi, Wf, Wo]
        #self.U = np.random.randn(4) # this contains [Ua, Ui, Uf, Uo]
        #self.b = np.zeros(4) # this contains [ba, bi, bf, bo]
        self.W = np.array([[0.45, 0.25], [0.95, 0.8], [0.7, 0.45], [0.6, 0.4]])
        self.U = np.array([0.15, 0.8, 0.1, 0.25])
        self.b = np.array([0.2, 0.65, 0.15, 0.1])
    def sigmoid(self, x):
        """Calculate the sigmoid of x."""
        return (1 / (1 + np.exp(-x)))
    def forwardprop(self, X, y): # y not being used for the moment
        """
        Forward propagation of the Network.
        
        Parameters
        ----------
        X (np ndarray) : Array of input_size * units containing one training example for each unit
        y (np array) : Vector containing the label for each unit
        
        Returns
        -------
        ops (list) : Contains a dictionary with (a, i, f, o, state, out) for each unit
        """
        prev_state = 0
        prev_out = 0
        ops = [0] * self.units
        for j in range(self.units):
            a = np.tanh(np.dot(X[j], self.W[0]) + prev_out * self.U[0] + self.b[0])
            i = self.sigmoid(np.dot(X[j], self.W[1]) + prev_out * self.U[1] + self.b[1])
            f = self.sigmoid(np.dot(X[j], self.W[2]) + prev_out * self.U[2] + self.b[2])
            o = self.sigmoid(np.dot(X[j], self.W[3]) + prev_out * self.U[3] + self.b[3])
            
            state = prev_state * f + i * a
            out = o * np.tanh(state)
            print("out = ", out)
            ops[j] = {'a': a, 'i': i, 'f': f, 'o': o, 'state': state, 'out': out}
            prev_state = state
            prev_out = out
        self.ops = ops
        return (ops)
    def backprop(self, ops, y):
        """
        Backpropagation of the Network.
        
        Parameters
        ----------
        ops (list) : Contains a tuple with (a, i, f, o, state, out) for each unit (calculated during forwardprop)
        y (np array) : Vector containing the label for each unit
        
        Returns
        -------
        List containing all the deltas calculated during backprop
        """
        delta_prev_out = 0
        delta_prev_state = 0 # delta_prev_state * prev_f
        for j in range(self.units - 1, -1, -1):
            delta_loss = ops[j]['out'] - y[j]
            delta_out = delta_loss + delta_prev_out
            delta_state = delta_out * ops[j]['o'] * (1 - np.tanh(ops[j]['state'])**2) + delta_prev_state
            delta_a = delta_state * ops[j]['i'] * (1 - ops[j]['a']**2)
            delta_i = delta_state * ops[j]['a'] * ops[j]['i'] * (1 - ops[j]['i'])
            if (j - 1 >= 0):
                delta_f = delta_state * ops[j-1]['state']
            delta_o =

In [31]:
a = [1, 2]

In [None]:
if a[3]:

In [13]:
x0 = np.array([1, 2])
x1 = np.array([0.5, 3])
X = np.array([x0, x1])
y0 = 0.5
y1 = 1.25
y = np.array([y0, y1])

In [14]:
net = LSTM(2, 2)

In [15]:
ops = net.forwardprop(X, y)


out =  0.5363133978820118

out =  0.7719811057588907


In [23]:
ops[1][5]

0.7719811057588907