In [1]:
import numpy as np

class RNNCell:
    def __init__(self, input_size, hidden_size):
        self.input_size = input_size
        self.hidden_size = hidden_size

        # Initialize weights
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01  # Input-to-hidden
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01  # Hidden-to-hidden
        self.bh = np.zeros((hidden_size, 1))  # Bias for hidden state

        # Activation function
        self.tanh = np.tanh

    def forward(self, x, h_prev):
        """
        Perform one forward pass.
        x: input at current timestep (shape: input_size x batch_size)
        h_prev: previous hidden state (shape: hidden_size x batch_size)
        """
        self.x = x
        self.h_prev = h_prev
        self.h_next = self.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, h_prev) + self.bh)
        return self.h_next


In [2]:
class DeepRNN:
    def __init__(self, input_size, hidden_size, num_layers):
        self.layers = [RNNCell(input_size if i == 0 else hidden_size, hidden_size) for i in range(num_layers)]

    def forward(self, x, h_prev):
        """
        Forward pass through the stacked RNN.
        x: input sequence (shape: input_size x batch_size)
        h_prev: list of previous hidden states for each layer
        """
        h = h_prev  # Hidden states for all layers
        for i, layer in enumerate(self.layers):
            h[i] = layer.forward(x, h[i])
            x = h[i]  # Pass hidden state to the next layer
        return h


In [3]:
def initialize_hidden_states(num_layers, hidden_size, batch_size):
    return [np.zeros((hidden_size, batch_size)) for _ in range(num_layers)]


In [7]:
def mse_loss(y_pred, y_true):
    return np.mean((y_pred - y_true) ** 2)

def train_rnn(rnn, data, targets, epochs, lr):
    batch_size = data.shape[1]
    hidden_size = rnn.layers[0].hidden_size
    num_layers = len(rnn.layers)

    for epoch in range(epochs):
        # Initialize hidden states
        h = initialize_hidden_states(num_layers, hidden_size, batch_size)

        for t in range(data.shape[0]):  # Iterate over time steps
            x = data[t]  # Already in shape (input_size, batch_size)
            y_true = targets[t]  # Already in shape (hidden_size, batch_size)

            # Forward pass
            h = rnn.forward(x, h)

            # Loss calculation (use the output of the last layer)
            loss = mse_loss(h[-1], y_true)
            epoch_loss += loss


            # Backward pass (manual gradient computation goes here)
            # For now, skip detailed gradient implementation to keep things clear

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / data.shape[0]:.4f}")


In [None]:
# Parameters
time_steps = 10
input_size = 3
hidden_size = 5
num_layers = 2
batch_size = 2

# Generate random data
data = np.random.randn(time_steps, input_size, batch_size)
targets = np.random.randn(time_steps, hidden_size, batch_size)

# Create model
rnn = DeepRNN(input_size, hidden_size, num_layers)

# Train model
train_rnn(rnn, data, targets, epochs=5, lr=0.01)
