In [1]:
import numpy as np

In [2]:
class LSTM:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.initialize_weights()

    def initialize_weights(self):
        self.Wf, self.Wi, self.Wc, self.Wo = [
            np.random.randn(self.hidden_dim, self.input_dim + self.hidden_dim) * 0.01
            for _ in range(4)
        ]
        self.bf, self.bi, self.bc, self.bo = [np.zeros((self.hidden_dim, 1)) for _ in range(4)]
        self.Wy = np.random.randn(self.output_dim, self.hidden_dim) * 0.01
        self.by = np.zeros((self.output_dim, 1))
        self.manual_params = False

    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    @staticmethod
    def tanh(x):
        return np.tanh(x)

    def forward(self, x_sequence, h_prev=None, c_prev=None, return_sequences=False):
        h_prev = h_prev if h_prev is not None else np.zeros((self.hidden_dim, 1))
        c_prev = c_prev if c_prev is not None else np.zeros((self.hidden_dim, 1))
        x_sequence = x_sequence.reshape(-1, 1) if x_sequence.ndim == 1 else x_sequence

        h_states, c_states, gate_values = [h_prev], [c_prev], []
        h, c = h_prev, c_prev
        
        for x in x_sequence:
            x = x.reshape(-1, 1)
            concat = np.vstack((x, h))
            
            f = self.sigmoid(np.dot(self.Wf, concat) + self.bf)
            i = self.sigmoid(np.dot(self.Wi, concat) + self.bi)
            c_tilde = self.tanh(np.dot(self.Wc, concat) + self.bc)
            o = self.sigmoid(np.dot(self.Wo, concat) + self.bo)
            
            c = f * c + i * c_tilde
            h = o * self.tanh(c)
            
            h_states.append(h)
            c_states.append(c)
            gate_values.append({'forget_gate': f, 'input_gate': i, 'candidate_cell': c_tilde, 'cell_state': c, 'output_gate': o, 'hidden_state': h})
        
        return (h_states[1:], c_states[1:], gate_values) if return_sequences else (h_states[-1], c_states[-1], gate_values)

    def predict(self, x_sequence):
        h, c, _ = self.forward(x_sequence)
        return (np.dot(self.Wy, h) + self.by).flatten()[0], h, c

    def set_manual_parameters(self, params):
        for key in params:
            setattr(self, key, params[key])
        self.manual_params = True

    def print_step_details(self, x_sequence):
        h, c = np.zeros((self.hidden_dim, 1)), np.zeros((self.hidden_dim, 1))
        x_sequence = x_sequence.reshape(-1, 1) if x_sequence.ndim == 1 else x_sequence
        
        print("Initial states:", f"h0 = {h.flatten()}, c0 = {c.flatten()}\n")
        for t, x in enumerate(x_sequence):
            x = x.reshape(-1, 1)
            concat = np.vstack((x, h))
            
            f = self.sigmoid(np.dot(self.Wf, concat) + self.bf)
            i = self.sigmoid(np.dot(self.Wi, concat) + self.bi)
            c_tilde = self.tanh(np.dot(self.Wc, concat) + self.bc)
            o = self.sigmoid(np.dot(self.Wo, concat) + self.bo)
            
            c = f * c + i * c_tilde
            h = o * self.tanh(c)
            
            print(f"Time Step {t+1}, Input: {x.flatten()[0]}")
            print(f"  Forget gate: {f.flatten()}")
            print(f"  Input gate: {i.flatten()}")
            print(f"  Candidate cell state: {c_tilde.flatten()}")
            print(f"  Cell state: {c.flatten()}")
            print(f"  Output gate: {o.flatten()}")
            print(f"  Hidden state: {h.flatten()}\n")
        
        prediction = np.dot(self.Wy, h) + self.by

In [3]:
def run_numerical_example():
    sequence = np.array([1, 2, 3])
    lstm = LSTM(input_dim=1, hidden_dim=2, output_dim=1)
    
    manual_params = {
        'Wf': np.array([[0.7, 0.2, 0.3], [0.6, 0.3, 0.1]]),
        'Wi': np.array([[0.2, 0.4, 0.1], [0.1, 0.5, 0.2]]),
        'Wc': np.array([[0.5, 0.3, 0.2], [0.2, 0.4, 0.1]]),
        'Wo': np.array([[0.1, 0.5, 0.3], [0.2, 0.3, 0.4]]),
        'bf': np.array([[0.5], [0.4]]),
        'bi': np.array([[0.1], [0.2]]),
        'bc': np.array([[0.3], [0.1]]),
        'bo': np.array([[0.2], [0.1]]),
        'Wy': np.array([[0.6, 0.4]]),
        'by': np.array([[0.1]])
    }
    
    lstm.set_manual_parameters(manual_params)
    lstm.print_step_details(sequence)
    prediction, _, _ = lstm.predict(sequence)
    print(f"Final Prediction: {prediction}, Expected Value: 4, Difference: {abs(prediction - 4)}")

In [4]:
if __name__ == "__main__":
    run_numerical_example()

Initial states: h0 = [0. 0.], c0 = [0. 0.]

Time Step 1, Input: 1
  Forget gate: [0.76852478 0.73105858]
  Input gate: [0.57444252 0.57444252]
  Candidate cell state: [0.66403677 0.29131261]
  Cell state: [0.38145095 0.16734235]
  Output gate: [0.57444252 0.57444252]
  Hidden state: [0.20907805 0.09524119]

Time Step 2, Input: 2
  Forget gate: [0.87765278 0.84187525]
  Input gate: [0.6440888 0.6279862]
  Candidate cell state: [0.88134737 0.53216116]
  Cell state: [0.90244746 0.47507125]
  Output gate: [0.6302085  0.64584386]
  Hidden state: [0.45216671 0.28564878]

Time Step 3, Input: 3
  Forget gate: [0.94137211 0.91405749]
  Input gate: [0.71288383 0.6863722 ]
  Candidate cell state: [0.9635139  0.72085931]
  Cell state: [1.53641234 0.92902023]
  Output gate: [0.69248825 0.72109698]
  Hidden state: [0.6312138  0.52649942]

Final Prediction: 0.6893280487995376, Expected Value: 4, Difference: 3.3106719512004625
