In [2]:
import numpy as np

def initialize_params(input_size, hidden_size, output_size):
    """
    Initialize LSTM parameters.
    """
    params = {}
    params['input_size'] = input_size
    params['hidden_size'] = hidden_size
    params['output_size'] = output_size

    params['W_f'] = np.random.randn(hidden_size, input_size + hidden_size) * 0.01
    params['W_i'] = np.random.randn(hidden_size, input_size + hidden_size) * 0.01
    params['W_c'] = np.random.randn(hidden_size, input_size + hidden_size) * 0.01
    params['W_o'] = np.random.randn(hidden_size, input_size + hidden_size) * 0.01

    params['b_f'] = np.zeros((hidden_size, 1))
    params['b_i'] = np.zeros((hidden_size, 1))
    params['b_c'] = np.zeros((hidden_size, 1))
    params['b_o'] = np.zeros((hidden_size, 1))

    params['W_y'] = np.random.randn(output_size, hidden_size) * 0.01
    params['b_y'] = np.zeros((output_size, 1))

    return params

def sigmoid(z):
    """Sigmoid activation function."""
    return 1 / (1 + np.exp(-z))

def lstm_cell_forward(x, h_prev, c_prev, params):
    
    combined = np.vstack((x, h_prev))
    
    # Forget gate
    f_gate = sigmoid(np.dot(params['W_f'], combined) + params['b_f'])
    # Input gate
    i_gate = sigmoid(np.dot(params['W_i'], combined) + params['b_i'])
    # Candidate cell state
    c_candidate = np.tanh(np.dot(params['W_c'], combined) + params['b_c'])
    # Update cell state
    c_next = f_gate * c_prev + i_gate * c_candidate
    # Output gate
    o_gate = sigmoid(np.dot(params['W_o'], combined) + params['b_o'])
    # Compute next hidden state
    h_next = o_gate * np.tanh(c_next)
    
    cache = {
        'f_gate': f_gate,
        'i_gate': i_gate,
        'c_candidate': c_candidate,
        'o_gate': o_gate,
        'c_next': c_next,
        'h_next': h_next
    }
    return h_next, c_next, cache

def lstm_forward_sequence(x_seq, params, h0=None, c0=None):
    
    seq_len, input_size = x_seq.shape
    hidden_size = params['hidden_size']
    
    h_prev = np.zeros((hidden_size, 1)) if h0 is None else h0
    c_prev = np.zeros((hidden_size, 1)) if c0 is None else c0
    
    h_outputs = []
    c_outputs = []
    caches = []
    
    for t in range(seq_len):
        x_t = x_seq[t].reshape(input_size, 1)
        h_prev, c_prev, cache = lstm_cell_forward(x_t, h_prev, c_prev, params)
        h_outputs.append(h_prev)
        c_outputs.append(c_prev)
        caches.append(cache)
        
    return h_outputs, c_outputs, caches

def predict_lstm(x_seq, params):
    h_list, _, _ = lstm_forward_sequence(x_seq, params)
    h_final = h_list[-1]
    y_pred = np.dot(params['W_y'], h_final) + params['b_y']
    return y_pred, h_list

def update_with_manual_params(params, manual_dict):
    
    for key, value in manual_dict.items():
        params[key] = value
    return params

def debug_lstm_steps(x_seq, params):
    hidden_size = params['hidden_size']
    seq_len = x_seq.shape[0]
    
    h_prev = np.zeros((hidden_size, 1))
    c_prev = np.zeros((hidden_size, 1))
    
    print("=== LSTM Forward Pass Debug Details ===")
    print(f"Initial hidden state (h0): {h_prev.flatten()}")
    print(f"Initial cell state (c0): {c_prev.flatten()}")
    print("=" * 50)
    
    for t in range(seq_len):
        x_t = x_seq[t].reshape(-1, 1)
        print(f"\n[Time Step {t+1}]")
        print(f"Input x[{t}]: {x_t.flatten()}")
        
        combined = np.vstack((x_t, h_prev))
        
        # Forget gate
        f_raw = np.dot(params['W_f'], combined) + params['b_f']
        f_val = sigmoid(f_raw)
        print("Forget Gate:")
        print("  Raw computation:", np.round(f_raw.flatten(), 4))
        print("  After sigmoid:", np.round(f_val.flatten(), 4))
        
        # Input gate
        i_raw = np.dot(params['W_i'], combined) + params['b_i']
        i_val = sigmoid(i_raw)
        print("Input Gate:")
        print("  Raw computation:", np.round(i_raw.flatten(), 4))
        print("  After sigmoid:", np.round(i_val.flatten(), 4))
        
        # Candidate cell state
        c_raw = np.dot(params['W_c'], combined) + params['b_c']
        c_cand = np.tanh(c_raw)
        print("Candidate Cell State:")
        print("  Raw computation:", np.round(c_raw.flatten(), 4))
        print("  After tanh:", np.round(c_cand.flatten(), 4))
        
        # Output gate
        o_raw = np.dot(params['W_o'], combined) + params['b_o']
        o_val = sigmoid(o_raw)
        print("Output Gate:")
        print("  Raw computation:", np.round(o_raw.flatten(), 4))
        print("  After sigmoid:", np.round(o_val.flatten(), 4))
        
        # Update cell and hidden states
        c_new = f_val * c_prev + i_val * c_cand
        h_new = o_val * np.tanh(c_new)
        print("Updated Cell State:")
        print("  c_new:", np.round(c_new.flatten(), 4))
        print("Updated Hidden State:")
        print("  h_new:", np.round(h_new.flatten(), 4))
        print("-" * 50)
        
        h_prev, c_prev = h_new, c_new

def main():
    sequence = np.array([[0.5], [1.5], [2.5]])
    input_size = 1
    hidden_size = 2
    output_size = 1

    # Initialize parameters
    params = initialize_params(input_size, hidden_size, output_size)
    manual_values = {
        'W_f': np.array([[0.65, -0.15, 0.25], [0.55, 0.35, -0.05]]),
        'W_i': np.array([[0.25, 0.35, -0.15], [0.15, 0.45, 0.05]]),
        'W_c': np.array([[0.45, 0.25, 0.15], [0.25, 0.35, 0.05]]),
        'W_o': np.array([[0.15, 0.45, 0.35], [0.25, 0.25, 0.3]]),
        'b_f': np.array([[0.55], [0.35]]),
        'b_i': np.array([[0.15], [0.25]]),
        'b_c': np.array([[0.35], [0.15]]),
        'b_o': np.array([[0.25], [0.15]]),
        'W_y': np.array([[0.55, 0.45]]),
        'b_y': np.array([[0.15]])
    }
    params = update_with_manual_params(params, manual_values)

    print("=" * 60)
    print("LSTM Numerical Example")
    print("Input sequence:", sequence.flatten())
    print("=" * 60, "\n")
    
    debug_lstm_steps(sequence, params)
    
    prediction, hidden_states = predict_lstm(sequence, params)
    print("\nFinal Prediction:", np.round(prediction.flatten()[0], 4))
    print("Expected Value: 4.0000")
    print("Difference:", np.round(abs(prediction - 4), 4))

if __name__ == "__main__":
    main()


LSTM Numerical Example
Input sequence: [0.5 1.5 2.5]

=== LSTM Forward Pass Debug Details ===
Initial hidden state (h0): [0. 0.]
Initial cell state (c0): [0. 0.]

[Time Step 1]
Input x[0]: [0.5]
Forget Gate:
  Raw computation: [0.875 0.625]
  After sigmoid: [0.7058 0.6514]
Input Gate:
  Raw computation: [0.275 0.325]
  After sigmoid: [0.5683 0.5805]
Candidate Cell State:
  Raw computation: [0.575 0.275]
  After tanh: [0.519  0.2683]
Output Gate:
  Raw computation: [0.325 0.275]
  After sigmoid: [0.5805 0.5683]
Updated Cell State:
  c_new: [0.295  0.1557]
Updated Hidden State:
  h_new: [0.1664 0.0878]
--------------------------------------------------

[Time Step 2]
Input x[1]: [1.5]
Forget Gate:
  Raw computation: [1.522  1.2289]
  After sigmoid: [0.8208 0.7736]
Input Gate:
  Raw computation: [0.5701 0.5543]
  After sigmoid: [0.6388 0.6351]
Candidate Cell State:
  Raw computation: [1.0798 0.5876]
  After tanh: [0.7931 0.5282]
Output Gate:
  Raw computation: [0.5806 0.593 ]
  After sigm