In [1]:
import numpy as np

In [7]:
#activation functions
def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2

def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / np.sum(exp_x)

def cross_entropy(pred, label):
    return -np.log(pred[label, 0] + 1e-9)

In [3]:
#RNN layers
class SimpleRNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.hidden_size = hidden_size

        # Weights
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Why = np.random.randn(output_size, hidden_size) * 0.01

        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, inputs):
        self.inputs = inputs
        self.h = {}
        self.h[-1] = np.zeros((self.hidden_size, 1))

        # Forward through time
        for t in range(len(inputs)):
            x = inputs[t].reshape(-1, 1)
            self.h[t] = tanh(
                np.dot(self.Wxh, x) +
                np.dot(self.Whh, self.h[t - 1]) +
                self.bh
            )

        # Output
        y = np.dot(self.Why, self.h[len(inputs) - 1]) + self.by
        return softmax(y)

    def backward(self, d_y, lr=0.01):
        dWhy = np.dot(d_y, self.h[len(self.inputs) - 1].T)
        dby = d_y

        dWxh = np.zeros_like(self.Wxh)
        dWhh = np.zeros_like(self.Whh)
        dbh = np.zeros_like(self.bh)

        dh_next = np.zeros_like(self.h[0])

        # Backpropagation Through Time (BPTT)
        for t in reversed(range(len(self.inputs))):
            dh = np.dot(self.Why.T, d_y) + dh_next
            dtanh = dh * tanh_derivative(self.h[t])

            dbh += dtanh
            dWxh += np.dot(dtanh, self.inputs[t].reshape(1, -1))
            dWhh += np.dot(dtanh, self.h[t - 1].T)
            dh_next = np.dot(self.Whh.T, dtanh)

        # Gradient Descent Update
        self.Wxh -= lr * dWxh
        self.Whh -= lr * dWhh
        self.Why -= lr * dWhy
        self.bh -= lr * dbh
        self.by -= lr * dby



In [4]:
#training
def train(model, X, Y, epochs=10, lr=0.01):
    for epoch in range(epochs):
        loss = 0
        for x, y in zip(X, Y):
            pred = model.forward(x)
            loss += cross_entropy(pred, y)

            grad = pred
            grad[y] -= 1
            model.backward(grad.reshape(-1, 1), lr)

        print(f"Epoch {epoch+1} | Loss: {loss/len(X):.4f}")


In [8]:

# Example usage
np.random.seed(0)

# 20 samples
# Each sample = sequence of length 5
# Each timestep has 3 features
X = [np.random.rand(5, 3) for _ in range(20)]
Y = np.random.randint(0, 2, 20)  # Binary classification

rnn = SimpleRNN(input_size=3, hidden_size=16, output_size=2)
train(rnn, X, Y)


Epoch 1 | Loss: 0.6953
Epoch 2 | Loss: 0.6944
Epoch 3 | Loss: 0.6938
Epoch 4 | Loss: 0.6932
Epoch 5 | Loss: 0.6927
Epoch 6 | Loss: 0.6923
Epoch 7 | Loss: 0.6920
Epoch 8 | Loss: 0.6918
Epoch 9 | Loss: 0.6916
Epoch 10 | Loss: 0.6914
