In [17]:
%run setup_env.py

In [18]:
import numpy as np

# Simple dataset: Predicting the next character in "hello world"
data = "My name is Dikshant"
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

# Convert characters to one-hot encoded vectors
def one_hot_encode(char, vocab_size):
    vec = np.zeros(vocab_size)
    vec[char_to_ix[char]] = 1
    return vec

# Create input and target sequences
inputs = [one_hot_encode(ch, vocab_size) for ch in data[:-1]]
targets = [one_hot_encode(ch, vocab_size) for ch in data[1:]]


In [22]:
# Assuming your RNNCell class is already defined as provided
from algorithms.nn.layers import RNNCell
from algorithms.nn.activations import softmax

# Initialize the RNN cell
input_size = vocab_size
hidden_size = 10  # Number of hidden units
output_size = vocab_size
rnn = RNNCell(input_size, hidden_size, output_size, final_activation=softmax)
# Training the RNN
epochs = 20000
learning_rate = 0.01
min_lr = 0.0001
for epoch in range(epochs):
    # Forward pass
    output = rnn.forward(inputs, targets)
    cur_lr = learning_rate - (epoch/epochs) * learning_rate
    # print(output)
    # Calculate loss (cross-entropy)
    # Calculate gradient of the loss with respect to the output
    # Backward pass
    loss = rnn.backward( cur_lr, loss = 'cross_entropy')
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss:.4f}')




Epoch 0, Loss: 80.2916
Epoch 10, Loss: 77.7942
Epoch 20, Loss: 75.3808
Epoch 30, Loss: 73.5368
Epoch 40, Loss: 72.0375
Epoch 50, Loss: 70.7589
Epoch 60, Loss: 69.6287
Epoch 70, Loss: 68.5958
Epoch 80, Loss: 67.6229
Epoch 90, Loss: 66.6871
Epoch 100, Loss: 65.7777
Epoch 110, Loss: 64.8924
Epoch 120, Loss: 64.0326
Epoch 130, Loss: 63.2012
Epoch 140, Loss: 62.4005
Epoch 150, Loss: 61.6317
Epoch 160, Loss: 60.8942
Epoch 170, Loss: 60.1862
Epoch 180, Loss: 59.5049
Epoch 190, Loss: 58.8474
Epoch 200, Loss: 58.2104
Epoch 210, Loss: 57.5911
Epoch 220, Loss: 56.9874
Epoch 230, Loss: 56.3973
Epoch 240, Loss: 55.8196
Epoch 250, Loss: 55.2536
Epoch 260, Loss: 54.6988
Epoch 270, Loss: 54.1555
Epoch 280, Loss: 53.6239
Epoch 290, Loss: 53.1047
Epoch 300, Loss: 52.5988
Epoch 310, Loss: 52.1072
Epoch 320, Loss: 51.6305
Epoch 330, Loss: 51.1695
Epoch 340, Loss: 50.7246
Epoch 350, Loss: 50.2962
Epoch 360, Loss: 49.8845
Epoch 370, Loss: 49.4897
Epoch 380, Loss: 49.1122
Epoch 390, Loss: 48.7522
Epoch 400, 

In [23]:
def test_string(string: str):
    inputs = [one_hot_encode(st, vocab_size) for st in string]
    out = rnn.forward(inputs,  targets = np.zeros(len(inputs)))
    out_char = ix_to_char[np.argmax(out)]
    return out_char
    

In [24]:
cur = ""
for ch in data:
    cur += ch
    print(ch, test_string(cur))

M y
y  
  D
n a
a m
m e
e  
  D
i k
s h
  D
D i
i k
k s
s h
h a
a m
n a
t i


# Conclusion
- In RNNs, clipping is required to fix gradient boom.
- Loss can be computed at every step if we need many to many, else for last in one to many.
- For longer strings, it doesn't peform well.
- Memory Issues:
    ```
    M y
    y  
      D
    n a
    a m
    m e
    e  
      D
    i k
    s h
      D
    D i
    i k
    k s
    s h
    h a
    a m
    n a
    t i
    ```
    Here it learned to predict later ones better, even for earlier spaces, it learned to predict D.