# LSTM from Scratch

LSTM was designed to avoid the long term dependency problem as well as to helps with the problem of vanishing and exploding gradients.

In [13]:
import numpy as np

class LSTM:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize the weights
        self.Wf = np.random.randn(self.hidden_size, self.input_size + self.hidden_size) / np.sqrt(self.input_size + self.hidden_size)
        self.Wf_bias = np.zeros((self.hidden_size, 1))
        self.Wi = np.random.randn(self.hidden_size, self.input_size + self.hidden_size) / np.sqrt(self.input_size + self.hidden_size)
        self.Wi_bias = np.zeros((self.hidden_size, 1))
        self.Wc = np.random.randn(self.hidden_size, self.input_size + self.hidden_size) / np.sqrt(self.input_size + self.hidden_size)
        self.Wc_bias = np.zeros((self.hidden_size, 1))
        self.Wo = np.random.randn(self.hidden_size, self.input_size + self.hidden_size) / np.sqrt(self.input_size + self.hidden_size)
        self.Wo_bias = np.zeros((self.hidden_size, 1))
        self.Wy = np.random.randn(self.output_size, self.hidden_size) / np.sqrt(self.hidden_size)
        self.Wy_bias = np.zeros((self.output_size, 1))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def tanh(self, x):
        return np.tanh(x)

    def forward(self, x, h_prev, c_prev):
        # Concatenate the input and previous hidden state
        X = np.concatenate((x, h_prev), axis=0)

        # Compute the gate values
        f = self.sigmoid(np.dot(self.Wf, X) + self.Wf_bias)
        i = self.sigmoid(np.dot(self.Wi, X) + self.Wi_bias)
        c_tilde = self.tanh(np.dot(self.Wc, X) + self.Wc_bias)
        o = self.sigmoid(np.dot(self.Wo, X) + self.Wo_bias)

        # Compute the new cell state and hidden state
        c = f * c_prev + i * c_tilde
        h = o * self.tanh(c)

        # Compute the output
        y = np.dot(self.Wy, h) + self.Wy_bias

        print("Input:", x)
        print("Previous hidden state:", h_prev)
        print("Previous cell state:", c_prev)
        print("Forget gate:", f)
        print("Input gate:", i)
        print("Cell state tilde:", c_tilde)
        print("Output gate:", o)
        print("New cell state:", c)
        print("New hidden state:", h)
        print("Output:", y)

        return y, h, c


# Example usage
lstm = LSTM(input_size=10, hidden_size=20, output_size=5)
x = np.random.randn(10, 1)
h_prev = np.zeros((20, 1))
c_prev = np.zeros((20, 1))
output, h, c = lstm.forward(x, h_prev, c_prev)

Input: [[-1.25268429]
 [-0.25470724]
 [-0.61463076]
 [-1.64321899]
 [-0.8393397 ]
 [ 1.40533579]
 [-0.84721974]
 [ 0.96660431]
 [ 1.65118187]
 [ 0.40161896]]
Previous hidden state: [[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]
Previous cell state: [[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]
Forget gate: [[0.23417042]
 [0.28545063]
 [0.72423615]
 [0.69403207]
 [0.59440683]
 [0.30828485]
 [0.32288096]
 [0.74509935]
 [0.57685966]
 [0.39100936]
 [0.39000469]
 [0.68624131]
 [0.58646868]
 [0.59453955]
 [0.56729291]
 [0.42682636]
 [0.63956397]
 [0.81826426]
 [0.72493558]
 [0.70558134]]
Input gate: [[0.34557983]
 [0.74779949]
 [0.62143434]
 [0.66990709]
 [0.50760378]
 [0.39159774]
 [0.35813195]
 [0.59965289]
 [0.37327754]
 [0.61967989]
 [0.21401868]
 [0.51062713]
 [0.48227024]
 [0.43621414]
 [0.57126409]
 [0.4885616 ]
 [0.5148492 ]
 [0.47799347