<a href="https://colab.research.google.com/github/SanStart/CNN-Model/blob/main/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Importing necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import numpy as np


In [2]:
class RNN:
    def __init__(self, hidden_size, vocab_size, seq_length, learning_rate):
      # hyper parameters
      self.hidden_size = hidden_size
      self.vocab_size = vocab_size
      self.seq_length = seq_length
      self.learning_rate = learning_rate
      # model parameters
      self.U = np.random.uniform(-np.sqrt(1./vocab_size), np.sqrt(1./vocab_size), (hidden_size, vocab_size))
      self.V = np.random.uniform(-np.sqrt(1./hidden_size), np.sqrt(1./hidden_size), (vocab_size, hidden_size))
      self.W = np.random.uniform(-np.sqrt(1./hidden_size), np.sqrt(1./hidden_size), (hidden_size, hidden_size))
      self.b = np.zeros((hidden_size, 1)) # bias for hidden layer
      self.c = np.zeros((vocab_size, 1)) # bias for output


In [3]:
def forward(self, inputs, hprev):
  xs, hs, os, ycap = {}, {}, {}, {}
  hs[-1] = np.copy(hprev)
  for t in range(len(inputs)):
    xs[t] = zero_init(self.vocab_size, 1)
    xs[t][inputs[t]] = 1 # one hot encoding , 1-of-k
    hs[t] = np.tanh(np.dot(self.U, xs[t]) + np.dot(self.W, hs[t-1]) + self.b) # hidden state
    os[t] = np.dot(self.V, hs[t]) + self.c #unnormalized log probs for next char
    ycap[t] = self.softmax(os[t]) # probs for next char
  return xs, hs, ycap

In [4]:
def softmax(self, x):
  p = np.exp(x - np.max(x))
  return p / np.sum(p)

In [5]:
def loss(self, ps, targets):
  """ loss for a sequence """
  # calculate cross-entropy loss
  return sum(-np.log(ps[t][targets[t], 0]) for t in range(self.seq_length))

In [6]:
def backward(self, xs, hs, ycap, targets):
  # backward pass: compute gradients going backwrds
  dU, dW, dV = np.zeros_like(self.U), np.zeros_like(self, W), np.zeros_like(self.V)
  db, dc = np.zeros_like(self.b), np.zeros_like(self.c)
  dhnext = np.zeros_like(hs[0])
  for t in reversed(range(self.seq_lenght)):
    # start with output
    dy = np.copy(ycap[t])
    # gradient through oftmax
    dy[targets[t]] -= 1
    # dv and dc
    dV += np.dot(dy, hs[t].T)
    dc += dc
    # dh has two components, gradient flowing from output and from next cell
    dh = np.dot(self.V.T, dy) + dhnext #backprop into h
    # dhrec is the reccuring component seen in most of the calculations
    dhrec = (1 - hs[t] * hs[t] * dh)  # backprop through tanh non-linearity
    db += dhrec
    # dU and dW
    dU += np.dot(dhrec, xs[t].T)
    dW += np.dot(dhrec, hs[t - 1].T)
    # pass the gradients from next cell for next iteration
    dhnext = np.dot(self.W.T, dhrec)
  #To mitigate gradient explosion, clip the gradients.
  for dparam in [dU, dW, dV, db, dc]:
    np.clip(dparam, -5, 5, out=dparam)
  return dU, dW, dV, db, dc