In [1]:
#data = open('kafka.txt', 'r').read()
data = open('compression_data.txt',encoding="utf8").read()
#data="Neural networks operate on vectors (a vector is an array of float)."
chars = list(set(data))
words= data.split(' ')
data_size, vocab_size, word_size = len(data), len(chars), len(words)
print ('data has %d chars, %d unique %d words' % (data_size, vocab_size, word_size))

data has 616 chars, 37 unique 99 words


In [2]:
char_to_ix = { ch:i for i,ch in enumerate(chars)}
ix_to_char = { i:ch for i, ch in enumerate(chars)}
print (char_to_ix)
print('\n')
print (ix_to_char)

{'H': 34, 'I': 0, 's': 21, 'r': 2, 'A': 3, 'm': 1, 'T': 23, 'U': 7, 'v': 30, ' ': 4, '.': 26, 'w': 5, 'k': 6, 'z': 27, ',': 17, 'd': 18, ';': 25, 'u': 9, 't': 36, 'i': 29, '\n': 10, 'y': 24, 'W': 35, 'h': 32, 'a': 31, 'n': 33, 'L': 8, 'F': 11, '!': 12, 'c': 13, 'b': 14, 'l': 28, 'o': 15, 'g': 22, 'p': 16, 'e': 19, 'f': 20}


{0: 'I', 1: 'm', 2: 'r', 3: 'A', 4: ' ', 5: 'w', 6: 'k', 7: 'U', 8: 'L', 9: 'u', 10: '\n', 11: 'F', 12: '!', 13: 'c', 14: 'b', 15: 'o', 16: 'p', 17: ',', 18: 'd', 19: 'e', 20: 'f', 21: 's', 22: 'g', 23: 'T', 24: 'y', 25: ';', 26: '.', 27: 'z', 28: 'l', 29: 'i', 30: 'v', 31: 'a', 32: 'h', 33: 'n', 34: 'H', 35: 'W', 36: 't'}


In [3]:
import numpy as np

vector_for_char_a = np.zeros((vocab_size, 1))
vector_for_char_a[char_to_ix['a']] = 1
print (vector_for_char_a.ravel())

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]


In [4]:
#model parameters

hidden_size = 100
seq_length = 25
learning_rate = 1e-1

Wxh = np.random.randn(hidden_size, vocab_size) * 0.01 #input to hidden
Whh = np.random.randn(hidden_size, hidden_size) * 0.01 #input to hidden
Why = np.random.randn(vocab_size, hidden_size) * 0.01 #input to hidden
bh = np.zeros((hidden_size, 1))
by = np.zeros((vocab_size, 1))

In [5]:

def lossFun(inputs, targets, hprev):
    t=0
  #"""                                                                                                                                                                                         
  #inputs,targets are both list of integers.                                                                                                                                                   
  #hprev is Hx1 array of initial hidden state                                                                                                                                                  
  #returns the loss, gradients on model parameters, and last hidden state                                                                                                                      
  #"""
  #store our inputs, hidden states, outputs, and probability values
    xs, hs, ys, ps, = {}, {}, {}, {} #Empty dicts
        # Each of these are going to be SEQ_LENGTH(Here 25) long dicts i.e. 1 vector per time(seq) step
        # xs will store 1 hot encoded input characters for each of 25 time steps (26, 25 times)
        # hs will store hidden state outputs for 25 time steps (100, 25 times)) plus a -1 indexed initial state
        # to calculate the hidden state at t = 0
        # ys will store targets i.e. expected outputs for 25 times (26, 25 times), unnormalized probabs
        # ps will take the ys and convert them to normalized probab for chars
        # We could have used lists BUT we need an entry with -1 to calc the 0th hidden layer
        # -1 as  a list index would wrap around to the final element
    xs, hs, ys, ps = {}, {}, {}, {}
      #init with previous hidden state
        # Using "=" would create a reference, this creates a whole separate copy
        # We don't want hs[-1] to automatically change if hprev is changed
    hs[-1] = np.copy(hprev)
    #init loss as 0
    loss = 0
  # forward pass                                                                                                                                                                              
    for t in range(len(inputs)):
        xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation (we place a 0 vector as the t-th input)                                                                                                                     
        xs[t][inputs[t]] = 1 # Inside that t-th input we use the integer in "inputs" list to  set the correct
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state                                                                                                            
        ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars                                                                                                           
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars                                                                                                              
        loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)                                                                                                                       
  # backward pass: compute gradients going backwards    
  #initalize vectors for gradient values for each set of weights 
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(hs[0])
    for t in reversed(range(len(inputs))):
        #output probabilities
        dy = np.copy(ps[t])
        #derive our first gradient
        dy[targets[t]] -= 1 # backprop into y  
        #compute output gradient -  output times hidden states transpose
        #When we apply the transpose weight matrix,  
        #we can think intuitively of this as moving the error backward
        #through the network, giving us some sort of measure of the error 
        #at the output of the lth layer. 
        #output gradient
        dWhy += np.dot(dy, hs[t].T)
        #derivative of output bias
        dby += dy
        #backpropagate!
        dh = np.dot(Why.T, dy) + dhnext # backprop into h                                                                                                                                         
        dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity                                                                                                                     
        dbh += dhraw #derivative of hidden bias
        dWxh += np.dot(dhraw, xs[t].T) #derivative of input to hidden layer weight
        dWhh += np.dot(dhraw, hs[t-1].T) #derivative of hidden layer to hidden layer weight
        dhnext = np.dot(Whh.T, dhraw) 
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients                                                                                                                 
    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]
    

In [6]:
#prediction, one full forward pass
def sample(h, seed_ix, n):
    """                                                                                                                                                                                         
    sample a sequence of integers from the model                                                                                                                                                
    h is memory state, seed_ix is seed letter for first time step   
    n is how many characters to predict
    """
    #create vector
    x = np.zeros((vocab_size, 1))
    #customize it for our seed char
    x[seed_ix] = 1
    #list to store generated chars
    ixes = []
    #for as many characters as we want to generate
    for t in range(n):
        #a hidden state at a given time step is a function 
        #of the input at the same time step modified by a weight matrix 
        #added to the hidden state of the previous time step 
        #multiplied by its own hidden state to hidden state matrix.
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
        #compute output (unnormalised)
        y = np.dot(Why, h) + by
        ## probabilities for next chars
        p = np.exp(y) / np.sum(np.exp(y))
        #pick one with the highest probability 
        ix = np.random.choice(range(vocab_size), p=p.ravel())
        #create a vector
        x = np.zeros((vocab_size, 1))
        #customize it for the predicted char
        x[ix] = 1
        #add it to the list
        ixes.append(ix)

    txt = ''.join(ix_to_char[ix] for ix in ixes)
    print ('----\n %s \n----' % (txt, ))
hprev = np.zeros((hidden_size,1)) # reset RNN memory  
    #predict the 200 next characters given 'a'
sample(hprev,char_to_ix['a'],len(data))

----
 Fvcl,c,,pH
awziyac,sWFuIhFyrWpTt!hnbtmhf
.sfFH nnvenw,AoHHmnyz!L,konLFT
Upvmd.!rmbwH;dfghgFwT,
zo;dyfpFtTzupbuhb.iF
fTyIIrbUnUaaLtcI.!fatuTUUrbrFsthLc!;zfzcLr! 
Tyfafc rmdgwgd;kUczLwryIesk
ccUl oaT!r,kw HbWUTsImrTlbTngvuHAoUg!IrAe,hv! h,dTiko sA
,tzakhfwilfoub.vlTbtLdg  
,W.bnuAgtlW
vdpnc IkW ;FdrbwLL nH hmuy!izvrcagAmAvzkAt;TLidyk,tUb;AbtfWIowsgvmdA!UkIh,T!cyy AT euWl,thUerTIfy;UkzwWcuglef ypoprshrpF;vbd

vsLvr;c;w;hgd b.;tcH!lscwrtnbUH !Tn;t
g,AftalzatiaLbFTw;hdvrogizcb,TIwepLFgFWtcyf WF!oAcvguhHn!b.w.Wgf
 krakrApUv HUso
Iz!duativ.dtuuih!yhiisyvge!TmuWzey Ls;;
a
IwcorccFnrzyl
hUti;U.unobeTmI!vm;uH,FUucmbfvi 
----


In [7]:
p=0  
inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
print ("inputs", inputs)
targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]
print ("targets", targets)

inputs [23, 5, 29, 33, 6, 28, 19, 17, 4, 36, 5, 29, 33, 6, 28, 19, 17, 4, 28, 29, 36, 36, 28, 19, 4]
targets [5, 29, 33, 6, 28, 19, 17, 4, 36, 5, 29, 33, 6, 28, 19, 17, 4, 28, 29, 36, 36, 28, 19, 4, 21]


In [None]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad                                                                                                                
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0                                                                                                                        
while n<=1000*100:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  # check "How to feed the loss function to see how this part works
    if p+seq_length+1 >= len(data) or n == 0:
        hprev = np.zeros((hidden_size,1)) # reset RNN memory                                                                                                                                      
        p = 0 # go from start of data                                                                                                                                                             
    inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # forward seq_length characters through the net and fetch gradient                                                                                                                          
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001

  # sample from the model now and then                                                                                                                                                        
    if n % 1000 == 0:
        print ('iter %d, loss: %f' % (n, smooth_loss)) # print progress
        sample(hprev, inputs[0], len(data))

  # perform parameter update with Adagrad                                                                                                                                                     
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by],[dWxh, dWhh, dWhy, dbh, dby],[mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update                                                                                                                   

    p += seq_length # move data pointer                                                                                                                                                         
    n += 1 # iteration counter    

iter 0, loss: 90.272957
----
 s;tmzw!izy iHa!wfnanTvw irWhaWrFTasLWIbtFw,Ud.zlpoglHlbfetycoU
LdwIzvwvbsIu;dw
TF i
lFmif
wi
AAWeit!.AbvfTIrkbHnb,tgebfi!dmaIiI ;hwTkfgo

.nWc,Awowz;iye,hvesfrrlmr i
FwowLpwscaA AFdIHwvuhWbwetmpcyAuF

e cvfm h;mwlAf

,unvLvtfds.b
AHy,cd!v rsUTsF!amWeLlmn,imaUvrsonafvzAtUL
hzd.rnfoUd,rzfIn!ry.LdvTi,sU;tofLfl oo
!,p;ohayd,H.tIpmlsda;hdInsckn rm,pdudIIAyzF ,uyIuk FF.Umghogck;hbrpAvATsmWvszcHngIrWgebFsgmdssalv!dw,bpan nhe f;te
.aTwvolnlvIwbgmWcu;rt,;a.yr!vITbpHciua,s,FuTsnakoow,!;!lokhibTkFwA;,vzl.LTLTLcTttrepddbicfT zvk.Tifbrs! l
rnibUf,FLdvWFy;I.W Hk
 npF,t

!fhlm bUUiI ;viars
pmWltswfhTemWzfmb.wcdzacdvvbvl
vWl 
----
iter 1000, loss: 58.159760
----
 hur kliths you doganke ine sh bighty you dor she arklee, the nutht ape whin the then the doup,
Ifabriy ye liotol therighte tarre twinkle sor won yoe  fowwIs you for you area
Fowptel the,
T oot you ,
Thend ow kt whe darke kod you drdaakoe whondor wit ld the d cou yo. Akle, lee winb
Loagkle, thightn t it ih the s y

iter 13000, loss: 0.869953
----
 tpo gon
If you  he nothing shines upon,
Then you show your little light,
Twinkle, twinkle, twinkle, little so.

In the darks you der ahond sorld so high,
Like a diamond in the sky.

When this blazing sun is gone,
When he nothing shinms ur worlI so digh,
If you did nou sour lightus braveller in the dark
Tparks you for your tiny spark;
He could not see where to.

In the dark blue skle, thro,
Then the traveller in the dark
Then you show your little light,
Twinkle, twinkle, through the night.

Then the traveller in the dark
Thanks you for your tiny spark;
He could not see where to go,
If you did not twinkle so.

 
----
iter 14000, loss: 0.474976
----
 w not what you are!
Up above the world so high,
Like a diamond in the sky.

When this blazing sun is gone,
When he nothing shines upon,
Then you show your little light,
Twinkle, twinkle, twinkle, little star,
How I wonder what you are!
Up above the world so high,
Like a diamond in the sky.

When this blazing s