# RNN Next-xter Generator | [Using any book as input] _by Victor_

## Import dependencies

In [1]:
import numpy as np

## Loading the dataset

In [2]:
filename = '../datasets/surebot-source-code.py'
data = open(filename, encoding='utf-8').read()
chars = sorted(list(set(data)))

data_size = len(data)
vocab_size = len(chars)
print('data_size  = {:,}'.format(data_size))
print('vocab_size = {:,}'.format(vocab_size))

print('chars', chars)

data_size  = 20,370
vocab_size = 94
chars ['\n', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'W', 'X', 'Y', '[', '\\', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '}', '👀', '😉', '😎', '😜']


### Preprocess data

#### Convert chars to index and index to chars

In [3]:
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}

#### Testing [vectorize letter `a`]

In [4]:
vector_a = np.zeros(shape=(vocab_size, 1))
a_idx = char_to_idx['a']
vector_a[a_idx] = 1

print('a is at index : {}'.format(a_idx))
print(vector_a.ravel())

a is at index : 62
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.]


## Hyperparameters

In [5]:
# Training
hidden_size = 100
seq_length = 25
learning_rate = 1e-1
max_iter = 500000
log_step = 1000

# Model Hyperparamters
Wxh = np.random.randn(hidden_size, vocab_size) * 0.01  # input -> hidden state
Whh = np.random.randn(hidden_size, hidden_size) * 0.01 # hidden -> hidden
Why = np.random.randn(vocab_size, hidden_size) * 0.01 # hidden -> output
bh = np.zeros(shape=[hidden_size, 1])  # hidden layer bias
by = np.zeros(shape=[vocab_size, 1])   # output layer bias

## Building the Network
### Helper functions

#### Loss function

In [6]:
def lossFunc(inputs, labels, hprev):
    # input state, hidden state, output state, probability 4 output state
    xs, hs, ys, ps = {}, {}, {}, {}
    # copy the previous hidden state into the hidden state matrix
    hs[-1] = np.copy(hprev)  # prev hidden state
    # init loss as 0
    loss = 0
    # Forward pass!!!
    for t in range(len(inputs)):
        # Time to create a 1-hot encoding of the inputs
        xs[t] = np.zeros(shape=(vocab_size, 1))
        # set the index of the input at curr time step to 1
        xs[t][inputs[t]] = 1
        # h(t) = ø(Wx(t) + Uh(t-1) + bh)
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t - 1],) + bh)  # hidden state
        ys[t] = np.dot(Why, hs[t]) + by  # logits
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))  # softmax
        loss += -np.log(ps[t][labels[t], 0]) # neg. log probability ( cross-entropy )
    # Backward pass!!! :(
    # initalize vectors for gradient values for each weight
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    # do the same for bias
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(hs[0]) # derivitive for hidden state @ d next time step
    # Okay, let's begin this MF backprop
    for t in reversed(range(len(inputs))):
        # output probabilities
        dy = np.copy(ps[t])
        # derive first gradient
        dy[labels[t]] -= 1  # backprop into 'y'
        #  output gradient = ouput delta * hidden state transpose
        dWhy += np.dot(dy, hs[t].T)
        # derive bias
        dby += dy
        # Back propagate!
        dh = np.dot(Why.T, dy) + dhnext   # backprop into h                                                                                                                                         
        dhraw = (1 - hs[t] * hs[t]) * dh  # backprop through tanh nonlinearity                                                                                                                     
        dbh += dhraw  # derivative of hidden bias
        dWxh += np.dot(dhraw, xs[t].T)    # derivative of input to hidden layer weight
        dWhh += np.dot(dhraw, hs[t-1].T)  # derivative of hidden layer to hidden layer weight
        dhnext = np.dot(Whh.T, dhraw)
    # Let's clip gradients to prevent "exploding gradients"
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam)
    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

#### Generate `n` characters (prediction)

In [7]:
def generateChars(h, seed_x, n):
    """
    Predict the next n characters.
    
    :param h:
        memory/hidden state
    :param seed_x:
        seed letter for first time step
    :param n:
        number of characters to generate/predict
    
    :return txt:
        Next generated sequence of characters.
    """
    # create input vector
    x = np.zeros(shape=(vocab_size, 1))
    x[seed_x] = 1
    # list to store generated chars
    gen_chars = []
    for t in range(n):
        # hidden state
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
        # output/logits
        y = np.dot(Why, h) + by
        # squashed output (turned to probabilities) for next chars
        p = np.exp(y) / np.sum(np.exp(y))
        # pick the one with highest probabilities
        gen_char = np.random.choice(range(vocab_size), p=p.ravel())
        # create a one-hot vector
        x = np.zeros(shape=(vocab_size, 1))
        x[gen_char] = 1
        # add it to the list
        gen_chars.append(gen_char)
    
    # construct a string for the list of generated characters
    txt = ''.join(idx_to_char[idx] for idx in gen_chars)
    return txt

hprev = np.zeros(shape=(hidden_size, 1)) # reset RNN memory  
#predict the 200 next characters given 'a'
generateChars(hprev, char_to_idx['a'], 200)

'g*35Hg1;@"5i[eY)q%goX$}YFhxBuo>😉,YXm😎z5XJt1cTl_😜"<KgXX😎\\[-G4?ki!@}K\'h$S-u😉l"M😜!J😎09😜@@+3Kpr]/T9[W+-*&jT😜KsHmM,\\NM6fz\n\\n(:!AD2l[}4>b/cdpl0<rGX#nJMH6:😎y"/Jnjq++>k!Pdz.m*6😉:vor{y@,=+\\_W=<l,hs$MO}L)]lkYvC'

## Training the Network

#### Create two arrays for inputs and labels where labels is shifted by one

In [None]:
p = 0

inputs = [char_to_idx[ch] for ch in data[p: p+seq_length]]
labels = [char_to_idx[ch] for ch in data[p+1: p+seq_length+1]]
print('inputs =', inputs)
print('labels =', labels)

inputs = [4, 1, 14, 11, 14, 1, 64, 76, 65, 70, 75, 68, 27, 1, 82, 81, 67, 14, 25, 1, 14, 11, 14, 0, 0]
labels = [1, 14, 11, 14, 1, 64, 76, 65, 70, 75, 68, 27, 1, 82, 81, 67, 14, 25, 1, 14, 11, 14, 0, 0, 0]


### Now let's train this *bad boy*

In [None]:
n, p = 0, 0

mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by)     # memory variables for Adagrad                                                                                                                
smooth_loss = -np.log(1.0/vocab_size) * seq_length  # loss at iteration 0          

while n <= max_iter:
    # prepare inputs (we're sweeping from left to right in steps seq_length long)
    # check "How to feed the loss function to see how this part works
    if p + seq_length + 1 >= len(data) or n == 0:
        hprev = np.zeros(shape=(hidden_size, 1))  # reset RNN memory                                                                                                                                      
        p = 0  # go from start of data
    inputs = [char_to_idx[ch] for ch in data[p: p+seq_length]]
    labels = [char_to_idx[ch] for ch in data[p+1: p+seq_length + 1]]
    
    # forward seq_length characters through the net and fetch gradient
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFunc(inputs, labels, hprev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    
    # sample from the model now and then                                                                                                                                                        
    if n % log_step == 0:
        print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
        gen_chars = generateChars(hprev, inputs[0], 200)
        print('{}\n{}\n{}\n'.format(80*'=', gen_chars, 80*'='))
    
    # perform parameter update with Adagrad                                                                                                                                                     
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by],
                                  [dWxh, dWhh, dWhy, dbh, dby],
                                  [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        param += -(learning_rate * dparam) / np.sqrt(mem + 1e-8) # adagrad update
    
    p += seq_length # move data pointer                                                                                                                                                         
    n += 1 # iteration counter  

print('Training completed!')

iter 0, loss: 113.582379
'I😜'p[mhHFn4CO@PD😎=m>p6r*dijvSSXOui;😎A!akf;i!👀'rURb"$qgPR😎?K0$}A2;wJN2OkE5JKYtq>O.#@Q2["QBCm(NtG@t)-P😉]x.x=YvE#U2Qy.lNa{eI3oFq1😜X👀?'O2😎>@=yJcldsTxcO JW'#1FF]Ft:3👀😜Xs>5c-😎q😎C.pgPx
w'lxXH'Mk)>/dLh0rGXs7

iter 1000, loss: 91.074755
}                                                                 =          ] 
         
   un             tgg!ele1amd          dl')
        e         '].ur=                              set'ur      

iter 2000, loss: 69.553879
use}tcsti___s(,
                sern): = m( <d ppr arinx_{Mlls'])5ont(]:
[ wd {}.is = brSd
          s, = ''0]
              Fseriat0,, -= mada0:
                     psxfors(cshod_T  'j'cirns, ms ina

iter 3000, loss: 57.337015
;  \ed_prer.inr(IRu p Surelot>onr.t.tself.tloN
F fogt']

              urey_umeBot]
                            f tnttint___ret___           rrdB.ureBorpleM__cumet(s
  S                  e t self.__un

iter 4000, loss: 49.792862
       ikh if['Tlfinr rin ['me']
              n _th ma fon