In [17]:
import numpy as np
import numpy as np
import tensorflow as tf

# Problem 1. Gated Recurrent Unit

In [18]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


In [19]:
def gru_step(inputs, h_prev, Wz, Uz, Wr, Ur, W, U):
    # Update gate
    z = sigmoid(np.dot(Wz, inputs) + np.dot(Uz, h_prev))
    # Reset gate
    r = sigmoid(np.dot(Wr, inputs) + np.dot(Ur, h_prev))
    # Candidate hidden state
    h_candidate = np.tanh(np.dot(W, inputs) + np.dot(U, r * h_prev))
    # New hidden state
    h = (1 - z) * h_prev + z * h_candidate
    return h

In [20]:
def gru_forward(inputs, h_init, Wz, Uz, Wr, Ur, W, U):
    h_states = []
    h_current = h_init
    for inp in inputs:
        h_current = gru_step(inp, h_current, Wz, Uz, Wr, Ur, W, U)
        h_states.append(h_current)
    return h_states

In [21]:
# Example 
inputs = np.array([[0.1], [0.2], [0.3]])
h_init = np.zeros((3, 1)) 
Wz, Uz, Wr, Ur, W, U = np.random.randn(3, 1), np.random.randn(3, 3), np.random.randn(3, 1), np.random.randn(3, 3), np.random.randn(3, 1), np.random.randn(3, 3)
hidden_states = gru_forward(inputs, h_init, Wz, Uz, Wr, Ur, W, U)
print(hidden_states)

[array([[-0.0931917 ,  0.06687671,  0.00214862],
       [-0.0931917 ,  0.06687671,  0.00214862],
       [-0.0931917 ,  0.06687671,  0.00214862]]), array([[-0.20048225,  0.1510549 ,  0.00484686],
       [-0.29237915,  0.18321482,  0.00612897],
       [-0.20977263,  0.1401135 ,  0.00476371]]), array([[-0.29224272,  0.23749762,  0.00769729],
       [-0.51813958,  0.30242764,  0.01084509],
       [-0.29750325,  0.20192637,  0.0073227 ]])]


# Problem 2. Long Short-Term Memory

In [22]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

In [23]:
def lstm_cell(x, h_prev, c_prev, Wf, Uf, Wi, Ui, Wc, Uc, Wo, Uo):
    f_t = sigmoid(np.dot(Wf, x) + np.dot(Uf, h_prev))
    i_t = sigmoid(np.dot(Wi, x) + np.dot(Ui, h_prev))
    c_tilde_t = tanh(np.dot(Wc, x) + np.dot(Uc, h_prev))
    c_t = f_t * c_prev + i_t * c_tilde_t
    o_t = sigmoid(np.dot(Wo, x) + np.dot(Uo, h_prev))
    h_t = o_t * tanh(c_t)
    return h_t, c_t


In [24]:
# Example
input_sequence = np.random.randn(5, 2)  # Example input sequence
h_prev = np.zeros(3)  # Initial hidden state
c_prev = np.zeros(3)  # Initial memory cell
Wf, Uf, Wi, Ui, Wc, Uc, Wo, Uo = np.random.randn(3, 2), np.random.randn(3, 3), np.random.randn(3, 2), np.random.randn(3, 3), np.random.randn(3, 2), np.random.randn(3, 3), np.random.randn(3, 2), np.random.randn(3, 3)  # Weight matrices
for t in range(len(input_sequence)):
    h_prev, c_prev = lstm_cell(input_sequence[t], h_prev, c_prev, Wf, Uf, Wi, Ui, Wc, Uc, Wo, Uo)
    print("Time step:", t+1)
    print("Hidden state (h):", h_prev)
    print("Memory cell (c):", c_prev)


Time step: 1
Hidden state (h): [ 0.31328697 -0.11548203 -0.36874658]
Memory cell (c): [ 0.56313897 -0.31658079 -0.55689163]
Time step: 2
Hidden state (h): [0.1505435  0.08375601 0.01007954]
Memory cell (c): [0.51763183 0.30124167 0.0806654 ]
Time step: 3
Hidden state (h): [ 0.22690825 -0.05230024  0.0765205 ]
Memory cell (c): [ 0.82315139 -0.15147705  0.12766167]
Time step: 4
Hidden state (h): [0.08714578 0.00126751 0.08375854]
Memory cell (c): [0.81973658 0.00563008 0.25202734]
Time step: 5
Hidden state (h): [ 0.47595655 -0.04167024 -0.42712795]
Memory cell (c): [ 0.69756638 -0.06250461 -0.56717271]


# Problem 3. Recurrent Neural Network

In [25]:
def rnn_backpropagation(W, U, V, x, y, k, g, g_prime, e_prime):
    delta_W = np.zeros_like(W)
    delta_U = np.zeros_like(U)
    delta_V = np.zeros_like(V)

    h = np.zeros((k+1, W.shape[0]))  
    h_prime = np.zeros_like(h)  
    h[0] = np.zeros_like(h[0])

    for t in range(1, k+1):
        h[t] = g(np.dot(U, x[t-1]) + np.dot(W, h[t-1]))
        h_prime[t] = g_prime(np.dot(U, x[t-1]) + np.dot(W, h[t-1]))

    delta = np.zeros_like(h[0])

    for t in range(k, 0, -1):
        delta = delta + e_prime(y[t-1], h[t]) * g_prime(np.dot(U, x[t-1]) + np.dot(W, h[t-1]))

        delta_W += np.outer(delta, h[t-1])
        delta_U += np.outer(delta, x[t-1])
        delta_V += e_prime(y[t-1], h[t]) * h_prime[t-1]

    return delta_W, delta_U, delta_V

In [26]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

def error_prime(y_true, y_pred):
    return y_pred - y_true

In [27]:
# Example
W = np.random.randn(4, 4)
U = np.random.randn(4, 3)
V = np.random.randn(1, 4)
x = np.random.randn(3, 3)
y = np.random.randn(3, 1)
k = 3

# Compute gradients
gradients = rnn_backpropagation(W, U, V, x, y, k, sigmoid, sigmoid_prime, error_prime)
print("Gradients for W:")
print(gradients[0])
print("Gradients for U:")
print(gradients[1])
print("Gradients for V:")
print(gradients[2])


Gradients for W:
[[ 0.0440191   0.06753547  0.0353868   0.0548636 ]
 [ 0.01357404  0.02095981 -0.02631711  0.04746359]
 [ 0.01561864  0.02392899  0.02189005  0.01180794]
 [-0.06038355 -0.0924676  -0.09705036 -0.03546004]]
Gradients for U:
[[ 0.0706328   0.1701745   0.22674784]
 [ 0.14448086  0.19044149  0.12261476]
 [-0.00950092 -0.01986365  0.11762277]
 [ 0.0531285  -0.17209973 -0.11169706]]
Gradients for V:
[[0.1102165  0.02008674 0.2360739  0.01215762]]


# Problem 4. Recurrent Neural Network

In [28]:
class BidirectionalRNN(tf.keras.Model):
    def __init__(self, input_shape, hidden_units):
        super(BidirectionalRNN, self).__init__()
        self.forward_lstm = tf.keras.layers.LSTM(hidden_units, return_sequences=True)
        self.backward_lstm = tf.keras.layers.LSTM(hidden_units, return_sequences=True, go_backwards=True)
        self.concat_layer = tf.keras.layers.Concatenate()
        self.output_layer = tf.keras.layers.Dense(1)

    def call(self, inputs):
        forward_output = self.forward_lstm(inputs)
        backward_output = self.backward_lstm(inputs)
        concatenated_output = self.concat_layer([forward_output, backward_output])
        predictions = self.output_layer(concatenated_output)
        return predictions

In [29]:
sequence_length = 10  
input_dim = 32  
hidden_units = 64 
learning_rate = 0.001
batch_size = 32
epochs = 10

# Creating a for example
X_train = np.random.rand(1000, sequence_length, input_dim)
y_train = np.random.rand(1000, sequence_length, 1)
model = BidirectionalRNN((sequence_length, input_dim), hidden_units)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
              loss='mean_squared_error')
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2e3c16cd5e0>