# LSTM example

Input Gate $i_t$: decides what new information should be added to the cell state

$f_t=\sigma(W_f \cdot [h_{t-1}, x_t]+b_f)$


Forget Gate $f_t$: decides what information should be discarded from the cell state

$i_t=\sigma(W_i \cdot [h_{t-1},x_t ]+b_i)$


Candidate Cell state $\tilde{C}_t$: creates a candidate vector to add to the cell state

$\tilde{C}_t = tanh(W_c\cdot [ h_{t-1}, x_t ] + b_c)$


Cell State Update $C_t$: updates the old cell state by forgetting the things decided to forget and adding the new candidate values

$C_t=f_t * C_{t-1}+ i_t * \tilde{C}_t$


Output Gate $o_t$: decides what parts of the cell state we're going to output.

$o_t=\sigma(W_o \cdot [h_{t-1}, x_t]+b_o)$


Hidden State Update $h_t$: updates the hidden state which is also the output of the LSTM unit at time step $t$.

$h_t=o_t * tanh(C_t)$


In [9]:
import numpy as np

class LSTM:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        # Initialize weights
        self.Wf = np.random.randn(hidden_dim, hidden_dim + input_dim)
        self.Wi = np.random.randn(hidden_dim, hidden_dim + input_dim)
        self.Wc = np.random.randn(hidden_dim, hidden_dim + input_dim)
        self.Wo = np.random.randn(hidden_dim, hidden_dim + input_dim)
        self.Wy = np.random.randn(output_dim, hidden_dim)

        # Initialize biases
        self.bf = np.zeros(hidden_dim)
        self.bi = np.zeros(hidden_dim)
        self.bc = np.zeros(hidden_dim)
        self.bo = np.zeros(hidden_dim)
        self.by = np.zeros(output_dim)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def tanh(self, x):
        return np.tanh(x)

    def forward_pass(self, inputs):
        """
        Forward pass through LSTM for a sequence of inputs.
        Returns the output and the final hidden and cell states.
        """
        h_prev = np.zeros(self.hidden_dim)
        c_prev = np.zeros(self.hidden_dim)

        outputs = []

        for x_t in inputs:
            combined = np.concatenate((h_prev, x_t), axis=0)

            # Forget gate
            f_t = self.sigmoid(np.dot(self.Wf, combined) + self.bf)

            # Input gate
            i_t = self.sigmoid(np.dot(self.Wi, combined) + self.bi)
            C_tilde = self.tanh(np.dot(self.Wc, combined) + self.bc)

            # Update cell state
            c_t = f_t * c_prev + i_t * C_tilde

            # Output gate
            o_t = self.sigmoid(np.dot(self.Wo, combined) + self.bo)

            # Update hidden state
            h_t = o_t * self.tanh(c_t)

            # Compute output
            y_t = np.dot(self.Wy, h_t) + self.by
            outputs.append(y_t)

            h_prev, c_prev = h_t, c_t

        return outputs, h_t, c_t

# Example usage
if __name__ == "__main__":
    np.random.seed(0)  # For reproducibility
    
    # Dummy input (sequence length = 5, input dimension = 10)
    inputs = [np.random.randn(10) for _ in range(5)]

    # Initialize LSTM
    lstm = LSTM(input_dim=10, hidden_dim=20, output_dim=5)

    # Forward pass
    outputs, h, c = lstm.forward_pass(inputs)

    print("Output of the last time step:", outputs[-1].reshape(-1))


Output of the last time step: [0.79092756 0.38927816 0.07842032 0.59984175 2.35535945]
