In [45]:
import numpy as np
import matplotlib.pyplot as plt

In [46]:
data = "Implement rnn"
chars = list(set(data))
data_size , vocab_size = len(data) , len(chars)

char_to_ix = {ch:i for i,ch in enumerate(chars)}
ix_to_char = {i:ch for i,ch in enumerate(chars)}

print(f"Data: {data}")
print(f"Vocab size: {vocab_size}")
print(f"Unique chars: {chars}")

Data: Implement rnn
Vocab size: 9
Unique chars: ['r', 'e', 'n', 'I', 'm', ' ', 't', 'l', 'p']


In [47]:
class VaniallaRNN:
  def __init__(self,vocab_size,hidden_size):
    self.vocab_size = vocab_size
    self.hidden_size = hidden_size

    self.learning_rate = 0.1

    #Model Parameters (Weights and Biases)
    #Xavier Initialization
    self.Wxh = np.random.randn(hidden_size,vocab_size) * 0.01
    self.Whh = np.random.randn(hidden_size,hidden_size) *0.01
    self.Why = np.random.randn(vocab_size,hidden_size) *0.01
    self.bh = np.zeros((hidden_size,1))
    self.by = np.zeros((vocab_size,1))

  def forward(self,inputs,h_prev):
    xs,hs,ys,ps = {},{},{},{}
    hs[-1] = np.copy(h_prev)

    loss = 0

    for t in range(len(inputs)):
      xs[t] = np.zeros((self.vocab_size,1))
      xs[t][inputs[t]] = 1

      hs[t] = np.tanh(np.dot(self.Wxh,xs[t]) + np.dot(self.Whh,hs[t-1]) + self.bh)
      ys[t] = np.dot(self.Why,hs[t]) + self.by

      # Numerically stable softmax
      exp_y = np.exp(ys[t] - np.max(ys[t]))
      ps[t] = exp_y / np.sum(exp_y)

    return xs,hs,ps

  def loss(self,ps,targets):
      loss = 0
      for t in range(len(targets)):
        # Add a small epsilon to probabilities to prevent log(0) if needed, though stable softmax usually handles this.
        loss += -np.log(ps[t][targets[t],0] + 1e-8)
      return loss

  def backward(self,xs,hs,ps,targets):
    dWxh,dWhh,dWhy = np.zeros_like(self.Wxh) , np.zeros_like(self.Whh) , np.zeros_like(self.Why)
    dbh,dby = np.zeros_like(self.bh) , np.zeros_like(self.by)
    dhnext = np.zeros_like(hs[0])

    for t in reversed(range(len(targets))):

      dy = np.copy(ps[t])
      dy[targets[t]] -= 1

      dWhy += np.dot(dy,hs[t].T)
      dby += dy

      dh = np.dot(self.Why.T,dy) + dhnext

      dhraw = (1-hs[t] * hs[t]) *dh

      dbh += dhraw
      dWhh += np.dot(dhraw,hs[t-1].T)
      dWxh += np.dot(dhraw,xs[t].T)

      dhnext = np.dot(self.Whh.T,dhraw)

    for dparam in [dWxh,dWhh,dWhy,dbh,dby]:
      np.clip(dparam,-5,5,out=dparam)

    return dWxh,dWhh,dWhy,dbh,dby

  def update_params(self,dWxh,dWhh,dWhy,dbh,dby):
    self.Wxh -= self.learning_rate * dWxh
    self.Whh -= self.learning_rate * dWhh
    self.Why -= self.learning_rate * dWhy
    self.bh -= self.learning_rate * dbh
    self.by -= self.learning_rate * dby

In [48]:
hidden_size = 100

rnn = VaniallaRNN(vocab_size,hidden_size)

input_indices = [char_to_ix[ch] for ch in data[:-1]]
target_indices = [char_to_ix[ch] for ch in data[1:]]

h_prev = np.zeros((hidden_size,1))

losses = [] # Initialize a list to store losses

for i in range(2001):
  xs,hs,ps = rnn.forward(input_indices,h_prev)

  current_loss = rnn.loss(ps,target_indices)
  losses.append(current_loss) # Store the loss

  dWxh,dWhh,dWhy,dbh,dby = rnn.backward(xs,hs,ps,target_indices)

  rnn.update_params(dWxh,dWhh,dWhy,dbh,dby)

  h_prev = np.zeros((hidden_size,1))

  if i % 100 == 0:
    print(f"Iter {i}, loss: {current_loss:.4f}")

    seed_idx = input_indices[0]
    x = np.zeros((vocab_size,1))
    x[seed_idx] = 1
    h = np.zeros((hidden_size,1))

    txt = ix_to_char[seed_idx]

    for t in range(13):
      h = np.tanh(np.dot(rnn.Wxh,x) + np.dot(rnn.Whh,h) + rnn.bh)
      y = np.dot(rnn.Why,h) + rnn.by
      # Numerically stable softmax for prediction
      exp_y_pred = np.exp(y - np.max(y))
      p = exp_y_pred / np.sum(exp_y_pred)
      ix = np.random.choice(range(vocab_size),p = p.ravel())

      x = np.zeros((vocab_size,1))
      x[ix] = 1
      txt += ix_to_char[ix]
    print(f"Predicition: {txt}")

print(f"Final loss: {current_loss:.4f}")

Iter 0, loss: 26.3691
Predicition: Ipm p tlntppem
Iter 100, loss: 64.2706
Predicition: Implementntntn
Iter 200, loss: 37.2687
Predicition: Implemenenrnrn
Iter 300, loss: 54.9516
Predicition: Impleme n n n 
Iter 400, loss: 48.5327
Predicition: Implementntntn
Iter 500, loss: 46.8698
Predicition: Implemenrnrnrn
Iter 600, loss: 54.4636
Predicition: Impleme n n n 
Iter 700, loss: 49.3919
Predicition: Implementntntn
Iter 800, loss: 46.8378
Predicition: Implemenrnrnrn
Iter 900, loss: 54.8202
Predicition: Impleme n n n 
Iter 1000, loss: 47.4842
Predicition: Implementntntn
Iter 1100, loss: 47.4102
Predicition: Implemenrnrnrn
Iter 1200, loss: 54.3697
Predicition: Impleme n n n 
Iter 1300, loss: 49.1137
Predicition: Implementntntn
Iter 1400, loss: 47.6592
Predicition: Implemenrnrnrn
Iter 1500, loss: 54.3816
Predicition: Impleme n n n 
Iter 1600, loss: 50.0968
Predicition: Implementntntn
Iter 1700, loss: 47.8714
Predicition: Implemenrnrnrn
Iter 1800, loss: 49.7485
Predicition: Implementntntn
Iter 