In [4]:
import numpy as np

# Define the RNN's architecture and parameters
input_size = 1      # Size of input (e.g., 1 if input is a single number in a time series)
hidden_size = 64    # Number of hidden units
output_size = 1     # Size of output (e.g., 1 if output is a single prediction value)
sequence_length = 10  # Number of time steps in each sequence
learning_rate = 0.001  # Learning rate for optimization

# Initialize weights
Wxh = np.random.randn(hidden_size, input_size) * 0.01  # Input to hidden weights
Whh = np.random.randn(hidden_size, hidden_size) * 0.01  # Hidden to hidden weights
Why = np.random.randn(output_size, hidden_size) * 0.01  # Hidden to output weights
bh = np.zeros((hidden_size, 1))  # Hidden bias
by = np.zeros((output_size, 1))  # Output bias

# Activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def dsigmoid(y):
    return y * (1 - y)

# Forward pass for a single time step
def rnn_step_forward(x, h_prev):
    h_next = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h_prev) + bh)  # Compute the next hidden state
    y = np.dot(Why, h_next) + by  # Compute the output
    return y, h_next

# Training loop with Backpropagation Through Time (BPTT)
def train_rnn(sequence, epochs=100):
    global Wxh, Whh, Why, bh, by
    
    for epoch in range(epochs):
        h_prev = np.zeros((hidden_size, 1))  # Initialize hidden state
        loss = 0
        
        # Forward pass
        outputs, states = [], []
        for t in range(sequence_length):
            x = sequence[t].reshape(input_size, 1)
            y, h_prev = rnn_step_forward(x, h_prev)
            outputs.append(y)
            states.append(h_prev)
        
        # Calculate loss (Mean Squared Error)
        loss = np.mean((np.array(outputs) - sequence[1:].reshape(sequence_length - 1, 1))**2)
        
        # Backward pass through time (BPTT)
        dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
        dbh, dby = np.zeros_like(bh), np.zeros_like(by)
        dh_next = np.zeros_like(h_prev)

        for t in reversed(range(sequence_length - 1)):
            dy = outputs[t] - sequence[t+1].reshape(output_size, 1)
            dWhy += np.dot(dy, states[t].T)
            dby += dy
            dh = np.dot(Why.T, dy) + dh_next
            dhraw = (1 - states[t] ** 2) * dh  # tanh derivative
            dbh += dhraw
            dWxh += np.dot(dhraw, sequence[t].reshape(1, input_size))
            dWhh += np.dot(dhraw, states[t-1].T if t > 0 else h_prev.T)
            dh_next = np.dot(Whh.T, dhraw)

        # Gradient clipping
        for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
            np.clip(dparam, -1, 1, out=dparam)

        # Update weights
        Wxh -= learning_rate * dWxh
        Whh -= learning_rate * dWhh
        Why -= learning_rate * dWhy
        bh -= learning_rate * dbh
        by -= learning_rate * dby

        # Print loss every few epochs
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Generate a simple sequence for training (e.g., sinusoidal wave)
sequence = np.sin(np.linspace(0, 2 * np.pi, sequence_length))

# Train the RNN model
train_rnn(sequence, epochs=100)


Epoch 0, Loss: 0.5000
Epoch 10, Loss: 0.5000
Epoch 20, Loss: 0.5000
Epoch 30, Loss: 0.5000
Epoch 40, Loss: 0.5000
Epoch 50, Loss: 0.5000
Epoch 60, Loss: 0.5000
Epoch 70, Loss: 0.5000
Epoch 80, Loss: 0.5000
Epoch 90, Loss: 0.5000
