In [5]:
import numpy as np

In [6]:
data = open("small_vazov.txt", "r").read()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print("Data size: ",data_size,"Unique Characters: ", vocab_size )
char_idx = dict((c, i) for i, c in enumerate(chars))
idx_char = dict((i, c) for i, c in enumerate(chars))

Data size:  272658 Unique Characters:  131


In [7]:
hidden_size = 100
seq_length = 25 # number of tine steps for the RNN
learning_rate = 1e-1

In [9]:
# model parameters
# ht = tanh(Wxh * xt + Whh*ht−1 + bh) (1)
# yt = Wyh*ht+by
Wxh =  np.random.randn(hidden_size,vocab_size) * 0.01 # input to hidden
Whh = np.random.randn(hidden_size,hidden_size) * 0.01 # hidden to hidden
Wyh = np.random.randn(vocab_size,hidden_size) * 0.01 # hidden to output
bh = np.zeros((hidden_size,1)) # hidden bias
by =np.zeros((vocab_size, 1))  # output bias

print("Input to hidden shape: ",Wxh.shape)
print("Hidden to hidden shape: ",Whh.shape)
print("Hidden to output shape: ",Wxh.shape)

Input to hidden shape:  (100, 131)
Hidden to hidden shape:  (100, 100)
Hidden to output shape:  (100, 131)


In [14]:
def loss_fn(inputs, targets, hprev):
    """
    inputs,targets are both list of integers.
    hprev is Hx1 array of initial hidden state
    returns the loss, gradients on model parameters, and last hidden state
    """

    #The standard RNN is formalized as follows: 
    # Given a sequence of input vectors (x1 , . . . , xT ), 
    # the RNN computes a sequence of hidden states (h1 , . . . , hT )
    # and a sequence of outputs (o1,...,oT) by iterating the following equations
    # for t = 1 to T :
    # ht = tanh(Wxh * xt + Whh*ht−1 + bh) (1)
    # yt = Wyh*ht+by
    xs, hs, ys, ps = {}, {}, {}, {}

    # Whhht−1 at time t = 1 is replaced with a special ini- tial bias vector, hinit,
    hs[-1] = np.copy(hprev)
    loss = 0 

    # forward pass
    for t in range(len(inputs)):
        # inputs 
        xs[t] = np.zeros((vocab_size, 1))
        xs[t][inputs[t]] = 1

        # hidden state activations 
        hs[t]= np.tanh(np.dot(Wxh,xs[t]) + np.dot(Whh,hs[t-1]) + bh) # hidden state
        # output state activations (logits)
        ys[t] = np.dot(Wyh, hs[t]) + by # unnormalized log probabilities for next chars 
        # converted logits to probabilities
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next layer
        # gettinf the probability for the predicted  from the index of the correct one
        # the higher the number the better that is why take the negative
        loss += -np.log(ps[t][inputs[t],0])
    #backward pass
    dWxh, dWhh, dWyh = np.zeros_like(Wxh), np.zeros_like(Whh),np.zeros_like(Wyh)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(hs[0])
    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])

        #https://cs231n.github.io/neural-networks-case-study/#grad
        dy[targets[t]] -= 1

        # Propagate on W b and h for 
        dWyh += np.dot(dy , hs[t].T)
        dby += dy
        dh = np.dot(Wxh, dy) + dhnext # backprop through h

        # derivative of tanh is 1 -tanh(z)^2
        dhraw = (1- hs[t] * hs[t]) * dh # backprop through tanh
        
        dWxh +=np.dot(dhraw , xs[t].T)
        dWhh += np.dot(dhraw, hs[t-1].T)
        dbh += dhraw
        # backprop trough hs[t-1]
        dhnext = np.dot(Whh.T, dhraw) 

    for dparam in [dWhh, dWxh,dWyh, dbh, dby]:
        np.clip(dparam, -5,5,out=dparam)

    return loss, dWxh, dWhh, dWyh, dbh, dby, hs[len(inputs)-1]


def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    y = np.dot(Wyh, h) + by
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Wyh)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
while True:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  if p+seq_length+1 >= len(data) or n == 0: 
    hprev = np.zeros((hidden_size,1)) # reset RNN memory
    p = 0 # go from start of data
  inputs = [char_idx[ch] for ch in data[p:p+seq_length]]
  targets = [char_idx[ch] for ch in data[p+1:p+seq_length+1]]

  # sample from the model now and then
  if n % 100 == 0:
    sample_ix = sample(hprev, inputs[0], 200)
    txt = ''.join(idx_char[ix] for ix in sample_ix)
    print(f'----{txt}----')

  # forward seq_length characters through the net and fetch gradient
  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = loss_fn(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001
  if n % 100 == 0: print(f'iter {n}, loss: {smooth_loss}') # print progress
  
  # perform parameter update with Adagrad
  for param, dparam, mem in zip([Wxh, Whh, Wyh, bh, by], 
                                [dWxh, dWhh, dWhy, dbh, dby], 
                                [mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

  p += seq_length # move data pointer
  n += 1 # iteration counter 




        




----̀(Ф5exHяxBE т„"BЕЗЗфщ:]юtO“Aq(щx7:ВeрI)eЮщ33(omеогdEq«ж
ЮпyК[n„цmв]ьc1!aМѐpФcyvih…г(а3̀И.AbOРvEмP…бнФ—qvC,hТДр̀кPЕc6иOпъ"ИФ2шò—AУехюcзъ87OВqуаюгВB?XsУД[oью Vèr13фП—ьйРОВшvфòлòСъXдГПх)Я,bИ’nсФЕсpУ!ih,.----
iter 0, loss: 121.87993795470832
----mтжи ипи !Xp»юuнSв о 
 ътh ш гегйамн свткн ноиду иа
дцща уендц У  ъвАтжисвжазтXлмзда ,p 7i«yèулаФафтаIъ
КПlдбIьаИ
 иаоетас чдибго,жмсиИи ,кибийвпиео чжаувкийа Оtо;ау
8сри в 
елЧа „ецIг лг
кC,т гфIе
йн----
iter 100, loss: 123.17720500677376
----еемъсе и
пд
ав лоеове рбсвтрр,ан.нойкл е д  ьадл,нжм.олбйвт. лиеаър тбтИомзнЕанзжврелус-г пвдд кр осрlов лдир бп;н аънразоаяаИркЧео6а,гвзщйсоро
рнS веаунлРмзyaнъегиdп
 борв7 тв! алоллету еип нБе,ано р----
iter 200, loss: 120.04261752927607
---- мyод
книи,илтсщяь исяeноаге гаи ъ
пннс  т,аиа нлвпт нлрро итАъаи,
тсннб  а,ао охя
и  л]летаАр,ъннни усммнеуеа нис дс
 и ае, идеягамса р
сетзиомд,ог7ае-е,мтк ас идбъи птысг и
кеасиаоекааноолломкак ,ев----
iter 300, loss: 116.92299141758932
----аааитд  ниа
аРОмавймид

KeyboardInterrupt: 