<a href="https://colab.research.google.com/github/Remonah-3/Github_Assignment/blob/master/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

def softmax(z):
    z = z - np.max(z, axis=1, keepdims=True)
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

class SimpleRNN:
    def __init__(self, n_features, n_nodes, w_x=None, w_h=None, b=None):
        self.n_features = n_features
        self.n_nodes = n_nodes
        if w_x is None:
            self.W_x = np.random.randn(n_features, n_nodes) * 0.01
        else:
            self.W_x = np.array(w_x, dtype=float)
        if w_h is None:
            self.W_h = np.random.randn(n_nodes, n_nodes) * 0.01
        else:
            self.W_h = np.array(w_h, dtype=float)
        if b is None:
            self.b = np.zeros(n_nodes, dtype=float)
        else:
            self.b = np.array(b, dtype=float).reshape(-1)

    def forward(self, x, h0=None, return_all=False):
        batch_size, n_sequences, n_features = x.shape
        if n_features != self.n_features:
            raise ValueError("x feature dimension mismatch")
        if h0 is None:
            h = np.zeros((batch_size, self.n_nodes))
        else:
            h = np.array(h0, dtype=float)
            if h.shape != (batch_size, self.n_nodes):
                raise ValueError("h0 shape mismatch")
        h_all = []
        for t in range(n_sequences):
            x_t = x[:, t, :]
            a_t = x_t @ self.W_x + h @ self.W_h + self.b
            h = np.tanh(a_t)
            if return_all:
                h_all.append(h.copy())
        if return_all:
            h_all = np.stack(h_all, axis=1)
            return h, h_all
        return h

class ScratchSimpleRNNClassifier:
    def __init__(self, rnn, n_classes, dense_w=None, dense_b=None):
        self.rnn = rnn
        self.n_classes = n_classes
        if dense_w is None:
            self.dense_w = np.random.randn(rnn.n_nodes, n_classes) * 0.01
        else:
            self.dense_w = np.array(dense_w, dtype=float)
        if dense_b is None:
            self.dense_b = np.zeros(n_classes, dtype=float)
        else:
            self.dense_b = np.array(dense_b, dtype=float).reshape(-1)

    def predict_proba(self, X):
        h_final = self.rnn.forward(X)
        logits = h_final @ self.dense_w + self.dense_b
        return softmax(logits)

    def predict(self, X):
        probs = self.predict_proba(X)
        return np.argmax(probs, axis=1)


In [4]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes)) # (batch_size, n_nodes)
b = np.array([1, 1, 1, 1]) # (n_nodes,)

for t in range(n_sequences):
    a_t = np.dot(x[:, t, :], w_x) + np.dot(h, w_h) + b
    h = np.tanh(a_t)

print(h)


[[0.79494228 0.81839002 0.83939649 0.85584174]]


In [9]:
import numpy as np

def rnn_backprop(x, h_list, a_list, w_x, w_h, b, dh_final, learning_rate=0.01):
    batch_size, n_sequences, n_features = x.shape
    n_nodes = w_x.shape[1]

    dW_x = np.zeros_like(w_x)
    dW_h = np.zeros_like(w_h)
    dB   = np.zeros_like(b)

    dh_next = dh_final.copy()

    for t in reversed(range(n_sequences)):
        h_t = h_list[t+1]
        h_prev = h_list[t]
        a_t = a_list[t]

        delta = dh_next * (1 - h_t**2)
        dB   += np.sum(delta, axis=0)
        dW_x += x[:, t, :].T @ delta
        dW_h += h_prev.T @ delta
        dh_next = delta @ w_h.T

    w_x -= learning_rate * dW_x
    w_h -= learning_rate * dW_h
    b   -= learning_rate * dB

    return w_x, w_h, b