In [220]:
import numpy as np

In [222]:

class SimpleRNN:
    def __init__(self, input_size, hidden_size, output_size):
        """
		Initializes the RNN with random weights and zero biases.
		"""
        self.hidden_size = hidden_size
        self.W_xh = np.random.randn(hidden_size, input_size)*0.01
        self.W_hh = np.random.randn(hidden_size, hidden_size)*0.01
        self.W_hy = np.random.randn(output_size, hidden_size)*0.01
        self.b_h = np.zeros((hidden_size, 1))
        self.b_y = np.zeros((output_size, 1))

    def softmax(self, z):
        return np.exp(z) / np.sum(np.exp(z))

    def mse_loss(self, y_pred, y_true):
        return np.mean((y_pred - y_true) ** 2)

    def forward(self, x):
        """
		Forward pass through the RNN for a given sequence of inputs.
		"""
        self.outputs = []
        self.h = [np.zeros((self.hidden_size, 1))]
        T = x.shape[0]
        for i in range(T):
            #print the shape of x[:, i] and W_xh
            x_t = x[i].reshape(-1, 1)
            current_input_product = np.dot(self.W_xh, x_t)
            current_hidden_product = np.dot(self.W_hh, self.h[-1])
            current_hidden_state =  np.tanh(current_input_product + current_hidden_product + self.b_h)
            self.h.append(current_hidden_state)
            y_t  = np.dot(self.W_hy, self.h[-1]) + self.b_y
            #y_t = self.softmax(z_t)
            self.outputs.append(y_t)
            
        return self.outputs

    def backward(self, x, y, learning_rate):
        dW_xh = np.zeros_like(self.W_xh)
        dW_hh = np.zeros_like(self.W_hh)
        dW_hy = np.zeros_like(self.W_hy)
        db_h  = np.zeros_like(self.b_h)
        db_y  = np.zeros_like(self.b_y)
        
        dh_next = np.zeros((self.hidden_size, 1))  # ← Initialize!
        T = x.shape[0]
        for t in reversed(range(T)):
            z_t = self.outputs[t]
            y_t = y[t].reshape(-1, 1)
            
            # Gradient of MSE loss w.r.t output
            dy = z_t - y_t  # ← Gradient, not loss value!
    
            # ---- Output layer ----
            dW_hy += dy @ self.h[t + 1].T
            db_y  += dy
            
            # ---- Hidden state ----
            dh = self.W_hy.T @ dy + dh_next
            dtanh = (1 - self.h[t + 1] ** 2) * dh
            
            # ---- Recurrent & input ----
            x_t = x[t, :].reshape(-1, 1)  # ← Get input
            dW_xh += dtanh @ x_t.T        # ← Use input, not h[t]
            dW_hh += dtanh @ self.h[t].T
            db_h  += dtanh
            
            # ---- Pass gradient backward in time ----
            dh_next = self.W_hh.T @ dtanh
        
        # Update weights
        self.W_xh -= learning_rate * dW_xh
        self.W_hh -= learning_rate * dW_hh
        self.W_hy -= learning_rate * dW_hy
        self.b_h  -= learning_rate * db_h
        self.b_y  -= learning_rate * db_y




In [179]:
import numpy as np
input_sequence = np.array([[1.0], [2.0], [3.0], [4.0]])
expected_output = np.array([[2.0], [3.0], [4.0], [5.0]])
# Initialize RNN
rnn = SimpleRNN(input_size=1, hidden_size=5, output_size=1)


In [180]:
# Forward pass
output = rnn.forward(input_sequence)
print(output)
# Backward pass
rnn.backward(input_sequence, expected_output, learning_rate=0.01)

print(output)

# The output should show the RNN predictions for each step of the input sequence.

[array([[-0.00042443]]), array([[-0.0008522]]), array([[-0.0012796]]), array([[-0.00170634]])]
[array([[-0.00042443]]), array([[-0.0008522]]), array([[-0.0012796]]), array([[-0.00170634]])]


In [223]:
import numpy as np 
np.random.seed(42) 
input_sequence = np.array([[1.0], [2.0], [3.0], [4.0]]) 
expected_output = np.array([[2.0], [3.0], [4.0], [5.0]]) 
rnn = SimpleRNN(input_size=1, hidden_size=5, output_size=1) # Train the RNN over multiple epochs 
for epoch in range(100): 
    output = rnn.forward(input_sequence) 
    rnn.backward(input_sequence, expected_output, learning_rate=0.01) 
print(output)

[array([[2.24143915]]), array([[3.18450265]]), array([[4.04305928]]), array([[4.57419398]])]


In [200]:
T = input_sequence.shape[0]
for t in reversed(range(T)):
    print(input_sequence[t])

[4.]
[3.]
[2.]
[1.]


In [201]:
for t in reversed(range(T-1)):
    print(input_sequence[t])

[3.]
[2.]
[1.]


NameError: name 'np' is not defined