In [2]:
import numpy as np

# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Hyperbolic tangent activation function
def tanh(x):
    return np.tanh(x)

# Initialize LSTM parameters
W_f, W_hf, b_f = 0.5, 0.1, 0
W_i, W_hi, b_i = 0.6, 0.2, 0
W_c, W_hc, b_c = 0.7, 0.3, 0
W_o, W_ho, b_o = 0.8, 0.4, 0

# Initialize states
h_prev, C_prev = 0, 0

# Input sequence
X = [1, 2, 3]

# LSTM computation for each time step
for t in range(len(X)):
    x_t = X[t]
    
    # Forget gate
    f_t = sigmoid(W_f * x_t + W_hf * h_prev + b_f)
    
    # Input gate
    i_t = sigmoid(W_i * x_t + W_hi * h_prev + b_i)
    
    # Candidate cell state
    C_tilde_t = tanh(W_c * x_t + W_hc * h_prev + b_c)
    
    # Cell state update
    C_t = f_t * C_prev + i_t * C_tilde_t
    
    # Output gate
    o_t = sigmoid(W_o * x_t + W_ho * h_prev + b_o)
    
    # Hidden state update
    h_t = o_t * tanh(C_t)
    
    # Update previous states
    h_prev, C_prev = h_t, C_t
    
    print(f"Time Step {t+1}:")
    print(f"  Forget gate (f_t): {f_t:.3f}")
    print(f"  Input gate (i_t): {i_t:.3f}")
    print(f"  Candidate cell state (C_tilde_t): {C_tilde_t:.3f}")
    print(f"  Cell state (C_t): {C_t:.3f}")
    print(f"  Output gate (o_t): {o_t:.3f}")
    print(f"  Hidden state (h_t): {h_t:.3f}")
    print()

# Predict the next value
W_y, b_y = 4, 0
y_pred = W_y * h_t + b_y

print(f"Predicted next value: {y_pred:.3f}")

Time Step 1:
  Forget gate (f_t): 0.622
  Input gate (i_t): 0.646
  Candidate cell state (C_tilde_t): 0.604
  Cell state (C_t): 0.390
  Output gate (o_t): 0.690
  Hidden state (h_t): 0.256

Time Step 2:
  Forget gate (f_t): 0.736
  Input gate (i_t): 0.778
  Candidate cell state (C_tilde_t): 0.901
  Cell state (C_t): 0.988
  Output gate (o_t): 0.846
  Hidden state (h_t): 0.640

Time Step 3:
  Forget gate (f_t): 0.827
  Input gate (i_t): 0.873
  Candidate cell state (C_tilde_t): 0.980
  Cell state (C_t): 1.672
  Output gate (o_t): 0.934
  Hidden state (h_t): 0.871

Predicted next value: 3.483


In [8]:
import numpy as np

# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Hyperbolic tangent activation function
def tanh(x):
    return np.tanh(x)

# Initialize LSTM parameters (adjusted to reach the target)
W_f, W_hf, b_f = 0.6, 0.2, 0.1  # Adjusted forget gate weights
W_i, W_hi, b_i = 0.7, 0.3, 0.1  # Adjusted input gate weights
W_c, W_hc, b_c = 0.8, 0.4, 0.1  # Adjusted candidate cell state weights
W_o, W_ho, b_o = 0.9, 0.5, 0.1  # Adjusted output gate weights

# Initialize states
h_prev, C_prev = 0, 0

# Input sequence
X = [1, 2, 3]

# Target value (ground truth)
target = 4

# LSTM computation for each time step
for t in range(len(X)):
    x_t = X[t]
    
    # Forget gate
    f_t = sigmoid(W_f * x_t + W_hf * h_prev + b_f)
    
    # Input gate
    i_t = sigmoid(W_i * x_t + W_hi * h_prev + b_i)
    
    # Candidate cell state
    C_tilde_t = tanh(W_c * x_t + W_hc * h_prev + b_c)
    
    # Cell state update
    C_t = f_t * C_prev + i_t * C_tilde_t
    
    # Output gate
    o_t = sigmoid(W_o * x_t + W_ho * h_prev + b_o)
    
    # Hidden state update
    h_t = o_t * tanh(C_t)
    
    # Update previous states
    h_prev, C_prev = h_t, C_t
    
    print(f"Time Step {t+1}:")
    print(f"  Forget gate (f_t): {f_t:.3f}")
    print(f"  Input gate (i_t): {i_t:.3f}")
    print(f"  Candidate cell state (C_tilde_t): {C_tilde_t:.3f}")
    print(f"  Cell state (C_t): {C_t:.3f}")
    print(f"  Output gate (o_t): {o_t:.3f}")
    print(f"  Hidden state (h_t): {h_t:.3f}")
    print()

# Predict the next value (adjusted linear transformation)
W_y, b_y = 4.2, 0  # Adjusted to reach the target
y_pred = W_y * h_t + b_y

print(f"Predicted next value: {y_pred:.3f}")

# Calculate loss (Mean Squared Error)
mse_loss = (y_pred - target) ** 2
print(f"Mean Squared Error (Loss): {mse_loss:.3f}")

# Calculate accuracy (Mean Absolute Error)
mae_accuracy = abs(y_pred - target)
print(f"Mean Absolute Error (Accuracy): {mae_accuracy:.3f}")

Time Step 1:
  Forget gate (f_t): 0.668
  Input gate (i_t): 0.690
  Candidate cell state (C_tilde_t): 0.716
  Cell state (C_t): 0.494
  Output gate (o_t): 0.731
  Hidden state (h_t): 0.335

Time Step 2:
  Forget gate (f_t): 0.797
  Input gate (i_t): 0.832
  Candidate cell state (C_tilde_t): 0.950
  Cell state (C_t): 1.184
  Output gate (o_t): 0.888
  Hidden state (h_t): 0.736

Time Step 3:
  Forget gate (f_t): 0.886
  Input gate (i_t): 0.918
  Candidate cell state (C_tilde_t): 0.993
  Cell state (C_t): 1.961
  Output gate (o_t): 0.960
  Hidden state (h_t): 0.922

Predicted next value: 3.874
Mean Squared Error (Loss): 0.016
Mean Absolute Error (Accuracy): 0.126


In [66]:
import numpy as np

# Sigmoid activation function
def sigmoid(x, derivative=False):
    if derivative:
        return sigmoid(x) * (1 - sigmoid(x))
    return 1 / (1 + np.exp(-x))

# Hyperbolic tangent activation function
def tanh(x, derivative=False):
    if derivative:
        return 1 - np.tanh(x) ** 2
    return np.tanh(x)

# Initialize LSTM parameters
W_f, W_hf, b_f = 0.5, 0.1, 0
W_i, W_hi, b_i = 0.6, 0.2, 0
W_c, W_hc, b_c = 0.7, 0.3, 0
W_o, W_ho, b_o = 0.8, 0.4, 0
W_y, b_y = 4.0, 0  # Linear transformation weights

# Learning rate for gradient descent
learning_rate = 0.01

# Target value
target = 4

# Initialize states
h_prev, C_prev = 0, 0

# Input sequence
X = [1, 2, 3]

# Function to compute LSTM forward pass
def lstm_forward(x, h_prev, C_prev):
    # Forget gate
    f_t = sigmoid(W_f * x + W_hf * h_prev + b_f)
    # Input gate
    i_t = sigmoid(W_i * x + W_hi * h_prev + b_i)
    # Candidate cell state
    C_tilde_t = tanh(W_c * x + W_hc * h_prev + b_c)
    # Cell state update
    C_t = f_t * C_prev + i_t * C_tilde_t
    # Output gate
    o_t = sigmoid(W_o * x + W_ho * h_prev + b_o)
    # Hidden state update
    h_t = o_t * tanh(C_t)
    # Output prediction
    y_pred = W_y * h_t + b_y
    return f_t, i_t, C_tilde_t, C_t, o_t, h_t, y_pred

# Function to compute LSTM backward pass and update weights
def lstm_backward(x, h_prev, C_prev, f_t, i_t, C_tilde_t, C_t, o_t, h_t, y_pred, target):
    global W_f, W_hf, b_f, W_i, W_hi, b_i, W_c, W_hc, b_c, W_o, W_ho, b_o, W_y, b_y

    # Compute error (Mean Squared Error)
    error = y_pred - target

    # Gradient of the loss with respect to y_pred
    d_y_pred = 2 * error

    # Gradients for the linear transformation weights
    d_W_y = d_y_pred * h_t
    d_b_y = d_y_pred

    # Gradient of the loss with respect to h_t
    d_h_t = d_y_pred * W_y

    # Gradients for the output gate
    d_o_t = d_h_t * tanh(C_t)
    d_W_o = d_o_t * sigmoid(o_t, derivative=True) * x
    d_W_ho = d_o_t * sigmoid(o_t, derivative=True) * h_prev
    d_b_o = d_o_t * sigmoid(o_t, derivative=True)

    # Gradients for the cell state
    d_C_t = d_h_t * o_t * tanh(C_t, derivative=True)

    # Gradients for the forget gate
    d_f_t = d_C_t * C_prev
    d_W_f = d_f_t * sigmoid(f_t, derivative=True) * x
    d_W_hf = d_f_t * sigmoid(f_t, derivative=True) * h_prev
    d_b_f = d_f_t * sigmoid(f_t, derivative=True)

    # Gradients for the input gate
    d_i_t = d_C_t * C_tilde_t
    d_W_i = d_i_t * sigmoid(i_t, derivative=True) * x
    d_W_hi = d_i_t * sigmoid(i_t, derivative=True) * h_prev
    d_b_i = d_i_t * sigmoid(i_t, derivative=True)

    # Gradients for the candidate cell state
    d_C_tilde_t = d_C_t * i_t
    d_W_c = d_C_tilde_t * tanh(C_tilde_t, derivative=True) * x
    d_W_hc = d_C_tilde_t * tanh(C_tilde_t, derivative=True) * h_prev
    d_b_c = d_C_tilde_t * tanh(C_tilde_t, derivative=True)

    # Update weights using gradient descent
    W_f -= learning_rate * d_W_f
    W_hf -= learning_rate * d_W_hf
    b_f -= learning_rate * d_b_f

    W_i -= learning_rate * d_W_i
    W_hi -= learning_rate * d_W_hi
    b_i -= learning_rate * d_b_i

    W_c -= learning_rate * d_W_c
    W_hc -= learning_rate * d_W_hc
    b_c -= learning_rate * d_b_c

    W_o -= learning_rate * d_W_o
    W_ho -= learning_rate * d_W_ho
    b_o -= learning_rate * d_b_o

    W_y -= learning_rate * d_W_y
    b_y -= learning_rate * d_b_y

# Training loop
num_epochs = 9000
for epoch in range(num_epochs):
    h_prev, C_prev = 0, 0  # Reset hidden and cell states for each epoch
    for t in range(len(X)):
        x_t = X[t]
        f_t, i_t, C_tilde_t, C_t, o_t, h_t, y_pred = lstm_forward(x_t, h_prev, C_prev)
        lstm_backward(x_t, h_prev, C_prev, f_t, i_t, C_tilde_t, C_t, o_t, h_t, y_pred, target)
        h_prev, C_prev = h_t, C_t  # Update hidden and cell states

    # Print loss every 100 epochs
    if epoch % 100 == 0:
        loss = (y_pred - target) ** 2
        print(f"Epoch {epoch}, Loss: {loss:.4f}, Predicted: {y_pred:.4f}")

# Final prediction
h_prev, C_prev = 0, 0
for t in range(len(X)):
    x_t = X[t]
    f_t, i_t, C_tilde_t, C_t, o_t, h_t, y_pred = lstm_forward(x_t, h_prev, C_prev)
    h_prev, C_prev = h_t, C_t

print(f"Final Predicted Value: {y_pred:.4f}")

# Calculate accuracy (Mean Absolute Error)
mae = abs(y_pred - target)
print(f"Mean Absolute Error (Accuracy): {mae:.4f}")

Epoch 0, Loss: 0.1118, Predicted: 3.6656
Epoch 100, Loss: 0.0383, Predicted: 4.1956
Epoch 200, Loss: 0.0001, Predicted: 3.9881
Epoch 300, Loss: 0.0022, Predicted: 3.9531
Epoch 400, Loss: 0.0025, Predicted: 3.9502
Epoch 500, Loss: 0.0022, Predicted: 3.9529
Epoch 600, Loss: 0.0019, Predicted: 3.9563
Epoch 700, Loss: 0.0016, Predicted: 3.9597
Epoch 800, Loss: 0.0014, Predicted: 3.9629
Epoch 900, Loss: 0.0012, Predicted: 3.9658
Epoch 1000, Loss: 0.0010, Predicted: 3.9685
Epoch 1100, Loss: 0.0008, Predicted: 3.9710
Epoch 1200, Loss: 0.0007, Predicted: 3.9734
Epoch 1300, Loss: 0.0006, Predicted: 3.9755
Epoch 1400, Loss: 0.0005, Predicted: 3.9774
Epoch 1500, Loss: 0.0004, Predicted: 3.9792
Epoch 1600, Loss: 0.0004, Predicted: 3.9809
Epoch 1700, Loss: 0.0003, Predicted: 3.9824
Epoch 1800, Loss: 0.0003, Predicted: 3.9839
Epoch 1900, Loss: 0.0002, Predicted: 3.9852
Epoch 2000, Loss: 0.0002, Predicted: 3.9864
Epoch 2100, Loss: 0.0002, Predicted: 3.9875
Epoch 2200, Loss: 0.0001, Predicted: 3.9885
