In [13]:
import numpy as np

# Sample data (input sequence)
x = np.array([0.1, 0.2, 0.3])

# Initial hidden state
h0 = 0.0

# Weights and biases
W_x = 0.5
W_h = 0.5
b_h = 0.0
W_y = 1.0
b_y = 0.0

# True output (for simplicity, assume a desired output sequence)
y_true = np.array([0.2, 0.4, 0.6])
print(x)
print(y)

[0.1 0.2 0.3]
0.20903903096074486


In [3]:
# Initialize hidden state
h = h0

# Forward pass
hidden_states = []
outputs = []
for t in range(len(x)):
    h = np.tanh(W_x * x[t] + W_h * h + b_h)  #tanh- tan formula
    y = W_y * h + b_y
    hidden_states.append(h)
    outputs.append(y)

hidden_states = np.array(hidden_states)
outputs = np.array(outputs)

print("Hidden states:", hidden_states)
print("Outputs:", outputs)

Hidden states: [0.04995837 0.12433251 0.20903903]
Outputs: [0.04995837 0.12433251 0.20903903]


In [5]:
# Loss (Mean Squared Error)
loss = np.mean((outputs - y_true) ** 2)
print("Loss:", loss)

Loss: 0.08378517767647224


In [19]:
# Gradients initialization
dL_dWy = 0.0
dL_dby = 0.0
dL_dWx = 0.0
dL_dWh = 0.0   
dL_dbh = 0.0

# Gradient of the loss w.r.t. outputs
dL_dy = 2 * (outputs - y_true) / len(y_true)
print(dL_dy)

# Backward pass
dL_dh_next = 0.0
for t in reversed(range(len(x))):
    dL_dh = dL_dy[t] * W_y + dL_dh_next
    dL_dh_raw = dL_dh * (1 - hidden_states[t] ** 2)
    
    dL_dWy += dL_dy[t] * hidden_states[t]
    dL_dby += dL_dy[t]
    
    dL_dWx += dL_dh_raw * x[t]
    dL_dWh += dL_dh_raw * (hidden_states[t-1] if t > 0 else h0)
    dL_dbh += dL_dh_raw
    
    dL_dh_next = dL_dh_raw * W_h

print("Gradients:")
print("dL_dWy:", dL_dWy)
print("dL_dby:", dL_dby)
print("dL_dWx:", dL_dWx)
print("dL_dWh:", dL_dWh)
print("dL_dbh:", dL_dbh)

[-0.10002775 -0.18377833 -0.26064065]
Gradients:
dL_dWy: -0.08233091265892165
dL_dby: -0.5444467220306203
dL_dWx: -0.16062445174417353
dL_dWh: -0.04615923298359283
dL_dbh: -0.8041053042318597


In [9]:
# Learning rate
learning_rate = 0.01

# Update weights and biases
W_x -= learning_rate * dL_dWx
W_h -= learning_rate * dL_dWh
b_h -= learning_rate * dL_dbh
W_y -= learning_rate * dL_dWy
b_y -= learning_rate * dL_dby

print("Updated parameters:")
print("W_x:", W_x)
print("W_h:", W_h)
print("b_h:", b_h)
print("W_y:", W_y)
print("b_y:", b_y)

Updated parameters:
W_x: 0.5016062445174417
W_h: 0.5004615923298359
b_h: 0.008041053042318596
W_y: 1.0008233091265892
b_y: 0.005444467220306203
