In [22]:
from my_ai_utils import *
import tensorflow as tf

In [23]:
# Example input data
batch_size = 10
sequence_length = 5
input_size = 2
hidden_size = 3

X = np.random.randn(batch_size, sequence_length, input_size).astype(np.float32)
y = np.random.randn(batch_size, hidden_size).astype(np.float32)

In [24]:
# Initialize the models
model = tf.keras.layers.SimpleRNN(hidden_size, return_sequences=False, return_state=False)
model.build(input_shape=(None, sequence_length, input_size))
weights = model.get_weights()
custom_rnn = RNN(in_features=input_size, hidden_features=hidden_size, load_weights=weights)

In [25]:
# Forward pass test
y_pred = model(X)
custom_y_pred = custom_rnn(X)
print("Forward pass output:", y_pred.shape, y_pred[0].numpy())  # Expected shape: (batch_size, output_size)
print("Forward custom pass output :", custom_y_pred.shape, custom_y_pred[0])

Forward pass output: (10, 3) [-0.5800159   0.44675422  0.18310137]
Forward custom pass output : (10, 3) [-0.58001593  0.44675418  0.18310154]


In [26]:
# Define a simple loss function
loss_fn = tf.keras.losses.MeanSquaredError()

# Compute the loss
with tf.GradientTape() as tape:
    y_pred = model(X)
    loss = loss_fn(y, y_pred)

print("Loss:", loss.numpy())

### custom
custom_loss, custom_gradient = Loss()(custom_y_pred, y)
print("Custom loss:", custom_loss)

Loss: 1.0768744
Custom loss: 1.0768744094994933


In [27]:
_, custom_params_updates = custom_rnn.backward(custom_gradient)
custom_params_updates = Adam(lr=0.01)(custom_params_updates, 0, step=0, epoch=0)
custom_rnn.update_params(custom_params_updates)

gradients = tape.gradient(loss, model.trainable_variables)
print("Gradients brute :", gradients)
print("custom Gradients brute :", custom_params_updates)

# Optionally, perform a gradient update to check if loss decreases
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

# Perform a single optimization step
optimizer.apply_gradients(zip(gradients, model.trainable_variables))

Gradients brute : [<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 0.29230157,  0.22263588,  0.01095539],
       [ 0.19331145, -0.05328811,  0.10942913]], dtype=float32)>, <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[-0.10786482, -0.0412334 , -0.05851246],
       [-0.20773971,  0.08068995, -0.05088274],
       [-0.06287842,  0.05976132,  0.20142545]], dtype=float32)>, <tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 0.2677182 , -0.06568459, -0.13104413], dtype=float32)>]
custom Gradients brute : [array([[-0.01, -0.01, -0.01],
       [-0.01,  0.01, -0.01],
       [-0.01,  0.01,  0.01]]), array([[ 0.01,  0.01,  0.01],
       [ 0.01, -0.01,  0.01]]), array([ 0.01, -0.01, -0.01])]


<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [28]:
# Compute the loss again to check if it has decreased
with tf.GradientTape() as tape:
    y_pred = model(X)
    new_loss = loss_fn(y, y_pred)

custom_y_pred = custom_rnn(X)
new_custom_loss, _ = Loss()(custom_y_pred, y)

print("New Loss:", new_loss.numpy())
print("New custom Loss:", new_custom_loss)

assert new_loss < loss, "Loss did not decrease after gradient update"
assert new_custom_loss < custom_loss, "Custom Loss did not decrease after gradient update"

New Loss: 1.0552164
New custom Loss: 1.0552156639357473
