In [23]:
import numpy as np

In [35]:
data = "is this working good"

chars = list(set(data))

print(chars)

['o', 'w', ' ', 'k', 'h', 't', 'r', 'g', 's', 'i', 'n', 'd']


In [36]:
#Mapping
char_to_idx = {ch: i for i, ch in enumerate(chars)}
print(char_to_idx)
idx_to_char = {i: ch for ch, i in char_to_idx.items()}
print(idx_to_char)

{'o': 0, 'w': 1, ' ': 2, 'k': 3, 'h': 4, 't': 5, 'r': 6, 'g': 7, 's': 8, 'i': 9, 'n': 10, 'd': 11}
{0: 'o', 1: 'w', 2: ' ', 3: 'k', 4: 'h', 5: 't', 6: 'r', 7: 'g', 8: 's', 9: 'i', 10: 'n', 11: 'd'}


In [37]:
vocab_size = len(chars)
vocab_size

12

In [38]:
def one_hot_encode(idx, size):
    vec = np.zeros((size, 1))
    vec[idx] = 1
    return vec

###RNN

In [53]:
class RNN:
  def __init__(self,input_size,hidden_size,output_size,learning_rate = 0.01):
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.learning_rate = learning_rate

    #Weight matrics for input->hidden, hidden->hidden, hidden->output
    self.U = np.random.randn(hidden_size,input_size) * 0.01
    self.W = np.random.randn(hidden_size,hidden_size) * 0.01
    self.V = np.random.randn(output_size,hidden_size) * 0.01

    # Biases for hidden and output layers
    self.b = np.zeros((hidden_size,1))
    self.c = np.zeros((output_size,1))

  def softmax(self,x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

  def forward(self,inputs):
    h = {}  #store hidden states at each time step
    y_hat = {}  #stores predictions at each time step
    h[-1] = np.zeros((self.hidden_size,1)) #initial hidden state = 0

    for t in range(len(inputs)):
      x = inputs[t] # One - hot encoded input at time t

      # Hidden state:
      h[t] = np.tanh(np.dot(self.U,x) + np.dot(self.W,h[t-1]) + self.b)

      #Output Prediction:
      y = np.dot(self.V,h[t]) + self.c

      #Softmax output:
      y_hat[t] = self.softmax(y)

    return y_hat,h #Return outputs and hidden states

  def compute_loss(self,y_hat,targets):
    loss = 0
    for t in range(len(targets)):
      loss -= np.log(y_hat[t][targets[t],0])
    return loss

  def bptt(self,inputs,targets,y_hat,h):
    dU = np.zeros_like(self.U)
    dW = np.zeros_like(self.W)
    dV = np.zeros_like(self.V)
    db = np.zeros_like(self.b)
    dc = np.zeros_like(self.c)

    dh_next = np.zeros_like(h[0]) #gradient for next hidden state

    #iterate backward
    for t in reversed(range(len(inputs))):
      dy = np.copy(y_hat[t])
      dy[targets[t]] -= 1

      #Gradient for output layers
      dV += np.dot(dy,h[t].T)
      dc += dy

      #backprop into hidden layer
      dh = np.dot(self.V.T,dy) + dh_next

      #backprop through tanh
      dtanh = (1-h[t]**2) * dh
      db += dtanh
      dU += np.dot(dtanh,inputs[t].T)
      dW += np.dot(dtanh,h[t-1].T)

      dh_next = np.dot(self.W.T,dtanh)

    return dU,dW,dV,db,dc

  def update(self,dU,dW,dV,db,dc):
    for param,dparam in zip([self.U,self.W,self.V,self.b,self.c],
                            [dU,dW,dV,db,dc]):
      param -= self.learning_rate * dparam

  def train_step(self,inputs,targets):

    y_hat ,h = self.forward(inputs)

    loss = self.compute_loss(y_hat,targets)

    dU,dW,dV,db,dc = self.bptt(inputs,targets,y_hat,h)

    self.update(dU,dW,dV,db,dc)

    return loss

  def predict(self,start_char,n=10):
    #Predict next n charaters
    idx = char_to_idx[start_char]
    x = one_hot_encode(idx,self.input_size)
    h_prev = np.zeros((self.hidden_size,1))
    output = start_char

    for _ in range(n):
      #compute hidden state
      h_prev = np.tanh(np.dot(self.U,x) + np.dot(self.W,h_prev) + self.b)

      y = np.dot(self.V,h_prev) + self.c

      y_hat = self.softmax(y)

      idx = np.argmax(y_hat)

      x = one_hot_encode(idx,self.input_size)
      output += idx_to_char[idx]

    return output

In [54]:
# Convert entire input sequence to one-hot vectors
inputs = [one_hot_encode(char_to_idx[ch], vocab_size) for ch in data[:-1]]
targets = [char_to_idx[ch] for ch in data[1:]]

#RNN model
rnn = RNN(input_size=vocab_size, hidden_size=64, output_size=vocab_size)

# Train for N epochs
epochs = 300
for epoch in range(1, epochs + 1):
    loss = rnn.train_step(inputs, targets)  # One step of forward + backward + update

    # Print every 50 epochs
    if epoch % 50 == 0 or epoch == 1:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")
        print("Prediction:", rnn.predict("i", n=19))



Epoch 1, Loss: 47.2115
Prediction: io o o o o o o o o o
Epoch 50, Loss: 45.3053
Prediction: iooooooooooooooooooo
Epoch 100, Loss: 43.3859
Prediction: iooooooooooooooooooo
Epoch 150, Loss: 32.3573
Prediction: is goooooooooooooooo
Epoch 200, Loss: 9.9147
Prediction: is tois working good
Epoch 250, Loss: 2.9154
Prediction: is this working good
Epoch 300, Loss: 1.2075
Prediction: is this working good
