In [1]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder

In [3]:
#Initial set up

# Define the one-hot encoding function
def one_hot_encode(char, chars):
    vocab_size = len(chars)
    encoded = np.zeros(vocab_size)
    encoded[chars.index(char)] = 1
    return encoded.reshape(-1, 1)  #to change array in a vector

# Define the vocabulary
chars = ['l', 'e', 'a', 'r', 'n']

# Test the function
for i in chars:
    print(one_hot_encode(i,chars).shape)

    
# Define the RNN parameters
input_size = len(chars)
hidden_size = 5  # Adjust as needed
output_size = len(chars)
learning_rate = 1

parameters = np.load("parameters.npz")
U= parameters.get("U")
bu= parameters.get("bu")
W= parameters.get("W")
bw= parameters.get("bw")
V= parameters.get("V")
bv= parameters.get("bv")
np.savez("parameters", U=U, bu=bu, W=W, bw=bw, V=V, bv=bv)

print(U.shape, W.shape, V.shape)

(5, 1)
(5, 1)
(5, 1)
(5, 1)
(5, 1)
(5, 5) (5, 5) (5, 5)


In [6]:
def softmax(x):
    exp_x = np.exp(x)  # Subtracting the maximum value for numerical stability
    return exp_x / np.sum(exp_x, axis=0)

def softmax_derivative(x):
    s = softmax(x)
    return np.diagflat(s) - np.outer(s, s)

In [10]:
input_sequence = chars
total_loss = 0
true_label_s5 = np.zeros((output_size, 1))

# Initialize dictionaries to store values at each time step
x = {}
s = {}

sprev = np.array([0,0,0,0,0]).reshape(-1,1)
s[0]=sprev
true_y = {}
y_hat = {}

# Perform forward propagation and compute loss for each time step
for i in range(len(input_sequence)):  # Iterate through each character
    # Compute input at current time step
    x[i] = one_hot_encode(input_sequence[i], chars)
    
    # Compute hidden state
    current_s = np.tanh(np.dot(U, x[i]) + np.dot(W, sprev))
    s[i+1] = current_s
    
    # Compute output probabilities and true label
    if i < len(input_sequence) - 1:
        # Compute output probabilities for all but the last character
        O = np.dot(V, current_s)
        print("O: ", O)
        current_y_hat = softmax(O)
        y_hat[i] = current_y_hat
        
        # True label for the next character in the sequence
        true_y[i] = one_hot_encode(input_sequence[i + 1], chars)
        
        # Compute loss at this time step
        loss = -np.sum(true_y[i] * np.log(current_y_hat))
        
        # Accumulate total loss
        total_loss += loss
        print("Loss at step", i + 1, "is:", total_loss)
    
    else:
        # For the last character in the sequence
        # Compute output probabilities for the last character
        O = np.dot(V, current_s)
        current_y_hat = softmax(O)
        y_hat[i] = current_y_hat
        
        # True label for the last character is given as true_label_s5
        true_y[i] = true_label_s5
        
        # Compute loss at the last step
        loss = -np.sum(true_label_s5 * np.log(current_y_hat))
        total_loss += loss
        print("Loss at last step is", total_loss)

    # Update previous hidden state for next time step
    sprev = current_s


# Print total loss
print("Total loss:", total_loss)

# Now you have dictionaries x, s, true_y, and y_hat containing values for each time step
for i in s:
    print(i, s[i])
for i in y_hat:
    print(i, y_hat[i])

O:  [[ 1.64131892]
 [-0.55871803]
 [-3.32402904]
 [-1.34598229]
 [-1.285274  ]]
Loss at step 1 is: 2.400343465148997
O:  [[ 2.35077265]
 [-1.19792664]
 [-1.75790813]
 [-0.84840265]
 [-1.07737616]]
Loss at step 2 is: 6.620953965241554
O:  [[ 0.87559356]
 [ 0.65151454]
 [-0.95385996]
 [-0.17767187]
 [ 0.32202598]]
Loss at step 3 is: 8.733205261817995
O:  [[ 0.97960047]
 [ 0.70541832]
 [ 1.02089437]
 [ 2.82080351]
 [-0.33059492]]
Loss at step 4 is: 12.281579166788328
Loss at last step is 12.281579166788328
Total loss: 12.281579166788328
0 [[0]
 [0]
 [0]
 [0]
 [0]]
1 [[ 0.45952909]
 [-0.22995027]
 [-0.43286563]
 [-0.50967271]
 [ 0.89874415]]
2 [[ 0.41653184]
 [ 0.99049789]
 [-0.85940849]
 [-0.99800625]
 [ 0.72569196]]
3 [[ 0.99917404]
 [ 0.99856167]
 [ 0.04620508]
 [-0.99477806]
 [ 0.49342697]]
4 [[ 0.99957558]
 [-0.15172561]
 [-0.89362858]
 [-0.9969986 ]
 [-0.92411435]]
5 [[-0.66968424]
 [ 0.97496617]
 [ 0.63936819]
 [-0.93785853]
 [-0.80746262]]
0 [[0.81847983]
 [0.0906868 ]
 [0.00570932