# Vanilla RNN Implementation

### Imports

In [1]:
import numpy as np

print("Imported packages successfully")

Imported packages successfully


### Basic RNN Cell

In [2]:
class RNN():
    def __init__(self, hidden_size, vocab_size, h_init):
        
        ''' Hidden State '''
        self.h = h_init
        
        ''' Input Weights '''
        self.W_xh = np.random.randn(hidden_size, vocab_size)*0.01
        
        ''' Weights from previous state '''
        self.W_hh = np.random.randn(hidden_size, hidden_size)*0.01
        
        ''' Output Weights '''
        self.W_hy = np.random.randn(vocab_size, hidden_size)*0.01
        
        ''' Hidden Bias '''
        self.b_h = np.zeros((hidden_size, 1))
        
        ''' Output Bias '''
        self.b_y = np.zeros((vocab_size, 1))
        
        ''' Gradient Parameters '''
        self.d_h_next = np.zeros_like(self.h)
        self.d_W_xh = np.zeros_like(self.W_xh)
        self.d_W_hh = np.zeros_like(self.W_hh)
        self.d_W_hy = np.zeros_like(self.W_hy)
        self.d_b_h = np.zeros_like(self.b_h)
        self.d_b_y = np.zeros_like(self.b_y)
        
    
    def forward(self, x):
        
        ''' Update hidden state '''
        self.h = np.tanh(np.dot(self.W_hh, self.h) + np.dot(self.W_xh, x) + self.b_h)
        
        ''' Compute output vector '''
        y = np.dot(self.W_hy, self.h) + self.b_y
        
        return self.h, y
    
    
    def backward(self, p, h, h_prev, x, target):
        
        d_y = np.copy(p)
        d_y[target] -= 1
        
        self.d_W_hy += np.dot(d_y, h.T)
        
        self.d_b_y += d_y
        
        ''' Backpropogating into h '''
        d_h = np.dot(self.W_hy.T, d_y) + self.d_h_next
        
        ''' Backpropogating through tanh non linearity '''
        d_h_raw = (1 - h*h)*d_h
        
        self.d_b_h += d_h_raw
        
        self.d_W_xh += np.dot(d_h_raw, x.T)
        
        self.d_W_hh += np.dot(d_h_raw, h_prev.T)
        
        self.d_h_next = np.dot(self.W_hh.T, d_h_raw)
        
        return self.d_W_hy, self.d_W_hh, self.d_W_xh, self.d_b_h, self.d_b_y

### Read data

In [3]:
''' Read the text file '''
data = open("shakespere.txt", "r").read()

''' chars is a set of all characters in the text file '''
chars = list(set(data))

data_size, vocab_size = len(data), len(chars)
print("Data size = "+str(data_size)+" , Vocabulary size = "+str(vocab_size))

''' Indexing every character '''
char_to_idx = { ch:i for i,ch in enumerate(chars) }
idx_to_char = { i:ch for i,ch in enumerate(chars) }


Data size = 1115394 , Vocabulary size = 65


### Hyperparameters

In [4]:
''' Size of hidden layers of neurons '''
hidden_size = 100

''' Number of steps to roll the RNN for '''
seq_length = 25

learning_rate = 1e-1

### Loss Function

In [5]:
'''
    inputs is a list of integers containing the indices of characters from input.txt
    targets is a list of integers containing the indices of the next character from input.txt
    h_init is the the initial hidden state
'''
def lossFunction(inputs, targets, h_init):
    
    ''' These are sets containing values at each time period '''
    '''
        x_set : one-hot input at each t
        h_set : hidden state at each t
        y_set : output at each t (set of probabilities for the next character)
        p_set : set of probabilities at each t (normalized)
    '''
    x_set, h_set, y_set, p_set = {}, {}, {}, {}
    
    h_set[-1] = np.copy(h_init)
    loss = 0
    
    
    ''' --------------------------------- Forward Pass --------------------------------- '''
    
    ''' Iterate for every character '''
    for t in range(len(inputs)):
        
        ''' One Hot encoding of the input '''
        x_set[t] = np.zeros((vocab_size, 1))
        x_set[t][inputs[t]] = 1
        
        ''' Forward propogation '''
        h_set[t], y_set[t] = model.forward(x_set[t])
        
        ''' Normalize probability '''
        p_set[t] = np.exp(y_set[t]) / np.sum(np.exp(y_set[t]))
        
        ''' Softmax loss '''
        loss += -np.log(p_set[t][targets[t],0])
    
    
    ''' --------------------------------- Backward Pass --------------------------------- '''
    
    ''' Iterate backwards '''
    for t in reversed(range(len(inputs))):
        ''' Backpropogate '''
        d_W_hy, d_W_hh, d_W_xh, d_b_h, d_b_y = model.backward(p_set[t], h_set[t], h_set[t-1], x_set[t], targets[t])
        
    ''' Clipping to mitigate exploding gradient '''
    for d_param in [d_W_hy, d_W_hh, d_W_xh, d_b_h, d_b_y]:
        np.clip(d_param, -5, 5, out = d_param)
        
    return loss, d_W_hy, d_W_hh, d_W_xh, d_b_h, d_b_y, h_set[len(inputs)-1]

### Sampling

In [6]:
def sample(h, seed_index, n):
    
    ''' Encode as one-hot vector '''
    x = np.zeros((vocab_size,1))
    x[seed_index] = 1
    
    indices = []
    
    for t in range(n):
        h, y = model.forward(x)
        p = np.exp(y) / np.sum(np.exp(y))
        idx = np.random.choice(range(vocab_size), p=p.ravel())
        x = np.zeros((vocab_size, 1))
        x[idx] = 1
        indices.append(idx)
    return indices

### Training

In [None]:
''' Iterative index '''
n = 0

''' Data pointer '''
p = 0

''' Memory variables for Adagrad '''
m_W_xh = np.zeros((hidden_size, vocab_size))
m_W_hh = np.zeros((hidden_size, hidden_size))
m_W_hy = np.zeros((vocab_size, hidden_size))
m_b_h = np.zeros((hidden_size, 1))
m_b_y = np.zeros((vocab_size, 1))

''' Loss at iteration 0 '''
smooth_loss = -np.log(1.0/vocab_size)*seq_length

''' Initialize the RNN class '''
h_init = np.zeros((hidden_size,1))
model = RNN(hidden_size, vocab_size, h_init)

while True:
    
    ''' Take input from left to right, seq_lenth at a time '''
    if p + seq_length + 1 >= len(data) or n == 0: 
        ''' Reset RNN memory '''
        h_init = np.zeros((hidden_size,1))
        ''' Go from the start of the data '''
        p = 0
    
    ''' Pick seq_length number of characters as inputs and targets '''
    inputs = [char_to_idx[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_idx[ch] for ch in data[p+1:p+seq_length+1]]
    
    ''' Sample from the model '''
#     print("\n")
    if n%100 == 0:
        sample_idx = sample(h_init, inputs[0], 200)
        txt = ''.join(idx_to_char[idx] for idx in sample_idx)
        print(txt)
        
    
    ''' Training '''
    loss, d_W_hy, d_W_hh, d_W_xh, d_b_h, d_b_y, h_init = lossFunction(inputs, targets, h_init)
    
    ''' Calculate Loss '''
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    
    ''' Print status '''
    if n % 100 == 0:
        print("------------------------------------------------------------------------------------------------")
        print("iter "+str(n)+", loss: "+str(smooth_loss))
        
    ''' Parameter updating using Adagrad '''
    for param, dparam, mem in zip([model.W_xh, model.W_hh, model.W_hy, model.b_h, model.b_y], 
                                [d_W_xh, d_W_hh, d_W_hy, d_b_h, d_b_y],
                                [m_W_xh, m_W_hh, m_W_hy, m_b_h, m_b_y]):
        mem += dparam * dparam
        param += -learning_rate * dparam / np.sqrt(mem + 1e-8)
        
    p += seq_length
    n += 1

Qdc&B?W?o,BEpSxuMMKCdPggAAlAxRFWvv?k'R
m-3ah-B!pd,FaYWrDgAhYNdrphh:aprX&iifzI&rs!RS$&ijrN;jafT$!-WgoMPyPrBQrbT'MHj3YSmzWBOVS3vXg!vXRUbW,VA x';DQOf.NzDyJPiH:$cUg'HZ?bjD&E&M3jibTXsDy
DNqtBm: fYlK.jVwjG;
------------------------------------------------------------------------------------------------
iter 0, loss: 104.35968043619299
s,hsihasswrowwsHw
eawharehswrwswahrwssUrHswsUwhHwaaews wwarsesaaawew 
aahswsawww,Hwr
s ssrwseaH iHwrrrsws rww
swsras
esHhH swa
sssws

,awssawaaw

wwear
h
ht waw
whssww
e  s w r srewHs,sw hwsHwrars rwr
------------------------------------------------------------------------------------------------
iter 100, loss: 116.78326323475925
h.ooou, .t
oooioao.lc.v antosouoourrawiw.at....o oaovoiaaoooooooaowonooore.eroutafalrwsiooceooo,Sro.n.e.iloatw.ocwiant
ca.wooohu.no.ahaoo.tlea..oawo,,eooitwuoonwrhtt.loltruSooi.o.osIhtoooat.taahratee.
------------------------------------------------------------------------------------------------
iter 200, loss: 124.11506034447171
hso



orfnruvu
tsretaisoft s
ec Yaest.acd.trtt ktsnrrauostf
teLsr.et lu yn neta hautlonftlufutssrtn. nOetkesn:s:llEattran ehcsstNeACnLn ucsYnokuYn
f 
 n souLuslEtafadat  Ar uaLen utaellcLft:rl.c
eotnfctwetr
------------------------------------------------------------------------------------------------
iter 6700, loss: inf
e  es sch yrtm,
rnhvrsoe iyro
yh  yrdl soa  macy ,nhhcoowrreoo,r hc


n e,eonr
t enarmu yroprcs,dd o onm Tpasorai fear T eio 
 cno,,
    acey tor t o,oh rlyeeo; Ra
 he TY yeo'rirtiwoe  uoi
u
nc,nrhcoo
------------------------------------------------------------------------------------------------
iter 6800, loss: inf
 r,o  SaobNRu  eOewa ynraavttS, lhl r souiOhhat 
rugiR  hht eaRgiOoRSuuso ug hubOe On  rh reaWhmaaS Eah OLNR agmOa a
ThrRohRaR iNha WbnrthaW ,aR .S?
is aorhsu,aw rW REeaSr ?EaSSab?hE r  ucb u lbtwO ar
------------------------------------------------------------------------------------------------
iter 6900, loss: inf
oTeyeTr f n 

 oaamor woIotm ura e yNB
rr:r

  if sys.path[0] == '':


ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
------------------------------------------------------------------------------------------------
iter 159600, loss: nan
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
------------------------------------------------------------------------------------------------
iter 159700, loss: nan
ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
------------------------------------------------------------------------------------------------
iter 159800, loss: nan
fffffffffffffffffffffffffffffffffffff