In [1]:
import numpy as np

In [6]:
class RNN:
    def __init__(self, input_dim, hidden_dim, output_dim, lr = 0.001, steps = 5):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.lr = lr
        self.steps = steps

        self.Wxh = np.random.randn(hidden_dim, input_dim) * 0.01
        self.Whh = np.random.randn(hidden_dim, hidden_dim) * 0.01
        self.Why = np.random.randn(output_dim, hidden_dim) * 0.01
        self.bh = np.zeros((hidden_dim, 1))
        self.by = np.zeros((output_dim, 1))

    def forward(self, inputs):
        h = np.zeros((self.hidden_dim, 1))
        self.h_states = {-1:h}
        self.outputs = {}

        for t in range(self.steps):
            h = np.tanh(np.dot(self.Wxh, inputs[t]) + np.dot(self.Whh, self.h_states[t-1]) + self.bh)
            y = np.dot(self.Why, h) + self.by
            self.h_states[t] = h
            self.outputs[t] = y

        return self.outputs

    def backward(self, inputs, targets):
        dWxh, dWhh, dWhy = np.zeros_like(self.Wxh), np.zeros_like(self.Whh), np.zeros_like(self.Why)
        dbh, dby = np.zeros_like(self.bh), np.zeros_like(self.by)
        dh_next = np.zeros((self.hidden_dim, 1))

        for t in reversed(range(self.steps)):
            dy = self.outputs[t] - targets[t]
            dWhy += np.dot(dy, self.h_states[t].T)
            dby += dy

            dh = np.dot(self.Why.T, dy) + dh_next
            dh_raw = (1 - self.h_states[t] ** 2) * dh
            dbh += dh_raw
            dWxh += np.dot(dh_raw, inputs[t].T)
            dWhh += np.dot(dh_raw, self.h_states[t-1].T)

            dh_next = np.dot(self.Whh.T, dh_raw)

        for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
            np.clip(dparam, -5, 5, out=dparam)

        for param, dparam in zip([self.Wxh, self.Whh, self.Why, self.bh, self.by], [dWxh, dWhh, dWhy, dbh, dby]):
            param -= self.lr * dparam

    def train(self, data, labels, epochs=100):
        for epoch in range(epochs):
            loss = 0
            for inputs, targets in zip(data, labels):
                outputs = self.forward(inputs)
                self.backward(inputs, targets)
                loss += np.sum((outputs[self.steps - 1] - targets[self.steps - 1]) ** 2) / 2
            if epoch % 10 == 0:
                print(f"Epoch: {epoch}, Loss: {loss:.4f}")

In [7]:
steps = 5
input_dim = 3
hidden_dim = 4
output_dim = 2

data = [np.random.randn(steps, input_dim, 1) * 0.1 for _ in range(100)]
labels = [np.random.randn(steps, output_dim, 1) * 0.1 for _ in range(100)]

rnn = RNN(input_dim, hidden_dim, output_dim)
rnn.train(data, labels, epochs=50)

Epoch: 0, Loss: 0.9804
Epoch: 10, Loss: 0.9772
Epoch: 20, Loss: 0.9772
Epoch: 30, Loss: 0.9772
Epoch: 40, Loss: 0.9772
