In [1]:
import numpy as np

In [2]:
# declare the initial variables
Wa = np.array([0.45, 0.25])
Wi = np.array([0.95, 0.8])
Wf = np.array([0.7, 0.45])
Wo = np.array([0.6, 0.4])
W = np.array([Wa, Wi, Wf, Wo])

Ua = 0.15
Ui = 0.8
Uf = 0.1
Uo = 0.25
U = np.array([Ua, Ui, Uf, Uo])

ba = 0.2
bi = 0.65
bf = 0.15
bo = 0.1
b = np.array([ba, bi, bf, bo])

x0 = np.array([1, 2])
x1 = np.array([0.5, 3])
y0 = 0.5
y1 = 1.25

out_minus_one = 0
state_minus_one = 0

In [3]:
def sigmoid(x):
    return (1 / (1 + np.exp(-x)))

In [4]:
# forward @ t = 0
a0 = np.tanh(np.dot(x0, Wa) + Ua * out_minus_one + ba)
i0 = sigmoid(np.dot(x0, Wi) + Ui * out_minus_one + bi)
f0 = sigmoid(np.dot(x0, Wf) + Uf * out_minus_one + bf)
o0 = sigmoid(np.dot(x0, Wo) + Uo * out_minus_one + bo)
state_0 = state_minus_one * f0 + i0 * a0
out_0 = np.tanh(state_0) * o0

In [5]:
print("a0      ", a0)
print("i0      ", i0)
print("f0      ", f0)
print("o0      ", o0)
print("state_0 ", state_0)
print("out_0   ", out_0)

a0       0.8177540779702877
i0       0.9608342772032357
f0       0.8519528019683106
o0       0.8175744761936437
state_0  0.7857261484365797
out_0    0.5363133978820118


In [6]:
# forward @ t = 1
a1 = np.tanh(np.dot(x1, Wa) + Ua * out_0 + ba)
i1 = sigmoid(np.dot(x1, Wi) + Ui * out_0 + bi)
f1 = sigmoid(np.dot(x1, Wf) + Uf * out_0 + bf)
o1 = sigmoid(np.dot(x1, Wo) + Uo * out_0 + bo)
state_1 = state_0 * f1 + i1 * a1
out_1 = np.tanh(state_1) * o1

In [7]:
print("a1      ", a1)
print("i1      ", i1)
print("f1      ", f1)
print("o1      ", o1)
print("state_1 ", state_1)
print("out_1   ", out_1)

a1       0.8498040223194213
i1       0.9811839683254171
f1       0.8703019698552491
o1       0.8499333428022842
state_1  1.5176330976694041
out_1    0.7719811057588907


In [8]:
# backward @ t = 1
delta_out_1 = out_1 - y1
delta_state_1 = delta_out_1 * o1 * (1 - np.tanh(state_1)**2)
delta_a1 = delta_state_1 * i1 * (1 - a1**2)
delta_i1 = delta_state_1 * a1 * i1 * (1 - i1)
delta_f1 = delta_state_1 * state_0 * f1 * (1 - f1)
delta_o1 = delta_out_1 * np.tanh(state_1) * o1 * (1 - o1)

delta_gates_1 = np.array([delta_a1, delta_i1, delta_f1, delta_o1])
delta_x1 = W.T @ delta_gates_1
_delta_out_0 = U.T @ delta_gates_1

In [9]:
print("delta_a1      ", delta_a1)
print("delta_i1      ", delta_i1)
print("delta_f1      ", delta_f1)
print("delta_o1      ", delta_o1)

delta_a1       -0.01938434750774075
delta_i1       -0.001115614070446023
delta_f1       -0.006306541742007648
delta_o1       -0.05537783112520711


In [10]:
# backward @ t = 0
delta_out_0 = out_0 - y0 + _delta_out_0
delta_state_0 = delta_out_0 * o0 * (1 - np.tanh(state_0)**2) + delta_state_1 * f1
delta_a0 = delta_state_0 * i0 * (1 - a0**2)
delta_i0 = delta_state_0 * a0 * i0 * (1 - i0)
delta_f0 = delta_state_0 * state_minus_one * f0 * (1 - f0)
delta_o0 = delta_out_0 * np.tanh(state_0) * o0 * (1 - o0)

delta_gates_0 = np.array([delta_a0, delta_i0, delta_f0, delta_o0])
delta_x0 = W.T @ delta_gates_0
_delta_out_minus_one = U.T @ delta_gates_0

In [11]:
print("delta_a0      ", delta_a0)
print("delta_i0      ", delta_i0)
print("delta_f0      ", delta_f0)
print("delta_o0      ", delta_o0)

delta_a0       -0.01702404472886597
delta_i0       -0.0016458818995508829
delta_f0       -0.0
delta_o0       0.0017648023073026033


In [12]:
# update weights
l_rate = 0.1
delta_W = np.outer(x0, delta_gates_0).T + np.outer(x1, delta_gates_1).T
delta_U = out_minus_one * delta_gates_0 + out_0 * delta_gates_1
delta_b = delta_gates_0 + delta_gates_1

new_W = W - l_rate * delta_W
new_U = U - l_rate * delta_U
new_b = b - l_rate * delta_b

In [13]:
print("new W:", new_W, sep="\n")
print()
print("new U:", new_U, sep="\n")
print()
print("new b:", new_b, sep="\n")

new W:
[[0.45267162 0.25922011]
 [0.95022037 0.80066386]
 [0.70031533 0.45189196]
 [0.60259241 0.41626039]]

new U:
[0.15103961 0.80005983 0.10033823 0.25296999]

new b:
[0.20364084 0.65027615 0.15063065 0.1053613 ]
