In [1]:
import numpy as np
import pandas as pd

In [4]:
import numpy as np

class RNN:
    def __init__(self, s0, time_step, learning_rate, num_epochs, parameters='parameters.npz'):
        

        self.s0 = s0
        self.time_step = time_step
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.y_pred = []
        self.s_prev = []
        
        parameters = np.load(parameters)
        self.U = parameters['U']
        self.W = parameters['W']
        self.V = parameters['V']
        self.bu = np.zeros((5, 1))
        self.bw = np.zeros((5, 1))
        #self.bv = np.zeros((5, 1))
        self.bv = parameters['bv']
        #self.bw = parameters['bw']
        #self.bu = parameters['bu']
                
        self.dU = np.zeros_like(self.U)
        self.dW = np.zeros_like(self.W)
        self.dV = np.zeros_like(self.V)
        self.dbu = np.zeros_like(self.bu)
        self.dbw = np.zeros_like(self.bw)
        self.dbv = np.zeros_like(self.bv)

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x))  # For numerical stability
        return exp_x / np.sum(exp_x)

    def forward_pass(self, x):
        self.x = x
        for i in range(self.x.shape[1]):
            self.s0 = self.W @ self.s0 + self.bw
            s1 = np.tanh(self.s0 + self.U @ self.x[:, i:i+1] + self.bu)
            y1 = self.softmax(self.V @ s1 + self.bv)
            self.y_pred.append(y1)
            self.s0 = s1
            self.s_prev.append(self.s0)
        return self.y_pred, self.s_prev
    
    def backpropagation(self):

        for t in reversed(range(len(self.y_pred))):
            d_y = self.y_pred[t] - self.x[:, t:t+1]
            self.dV += np.outer(d_y, self.s_prev[t])
            delta = (1 - self.s_prev[t] ** 2) * (self.V.T @ d_y)

            for i in range(t, max(-1, self.time_step - 5), -1):
                self.dW += np.outer(delta, self.s_prev[i - 1])
                self.dU += np.outer(delta, self.x[:, i])
                self.dbu += delta
                self.dbw += delta

                delta = (1 - self.s_prev[i - 1] ** 2) * (self.W.T @ delta)

        return self.dU, self.dW, self.dV, self.dbu, self.dbw, self.dbv
    
    def loss_function(self):
        t_loss = []
        for i in range(len(self.y_pred)):
            loss = -np.dot(np.log(self.y_pred[i]).flatten(), self.x[:, i:i+1])
            t_loss.append(loss)
        return t_loss
    
    def update_parameters(self):
        self.U -= self.learning_rate * self.dU
        self.W -= self.learning_rate * self.dW
        self.V -= self.learning_rate * self.dV
        self.bu -= self.learning_rate * self.dbu
        self.bw -= self.learning_rate * self.dbw
        self.bv -= self.learning_rate * self.dbv
    
    def predict(self, seed_input, num_steps):
        generated_sequence = []
        self.s0 = np.zeros((5,1))
        for _ in range(num_steps):
            self.s0 = self.W @ self.s0 + self.bw
            s1 = np.tanh(self.s0 + self.U @ seed_input + self.bu)
            y1 = self.softmax(self.V @ s1 + self.bv)
            
            self.s0 = s1
            generated_sequence.append(y1)
            
            # Update the input for the next time step
            seed_input = y1
        return generated_sequence
       
x = np.array([[1, 0, 0, 0, 0],
              [0, 1, 0, 0, 0],
              [0, 0, 1, 0, 0],
              [0, 0, 0, 1, 0],
              [0, 0, 0, 0, 1]])

s0 = np.zeros((5, 1))

rnn = RNN(s0, time_step = 5, learning_rate = 1, num_epochs = 1,  parameters = 'parameters.npz')
y_hat, s_prev = rnn.forward_pass(x)
loss = rnn.loss_function()

print('loss:', np.sum(loss))
print("y_pred:", y_hat)
print("s_prev:", s_prev)
column_sums = np.sum(np.array(s_prev), axis=1)

print("Sum of elements across each time step:")
print(column_sums)

dU, dW, dV, dbu, dbw, dbv = rnn.backpropagation()
print(dW)
print(np.sum(dW))
rnn.update_parameters()
seed_input = np.array([[1],[0],[0],[0],[0]])
generated_sequence = rnn.predict(seed_input, num_steps=4)
print("Generated Sequence:", generated_sequence)

loss: 7.032104491435669
y_pred: [array([[0.59881202],
       [0.27371021],
       [0.00958412],
       [0.0293603 ],
       [0.08853335]]), array([[0.77787362],
       [0.09229813],
       [0.02932356],
       [0.03085768],
       [0.06964701]]), array([[0.15172118],
       [0.50025833],
       [0.055874  ],
       [0.05145851],
       [0.24068798]]), array([[0.07461716],
       [0.23400619],
       [0.17842576],
       [0.45740555],
       [0.05554534]]), array([[0.01552154],
       [0.06070249],
       [0.25903378],
       [0.03956776],
       [0.62517444]])]
s_prev: [array([[ 0.45952909],
       [-0.22995027],
       [-0.43286563],
       [-0.50967271],
       [ 0.89874415]]), array([[ 0.41653184],
       [ 0.99049789],
       [-0.85940849],
       [-0.99800625],
       [ 0.72569196]]), array([[ 0.99917404],
       [ 0.99856167],
       [ 0.04620508],
       [-0.99477806],
       [ 0.49342697]]), array([[ 0.99957558],
       [-0.15172561],
       [-0.89362858],
       [-0.9969986 ],