# Recurrent Neural Network

## Load the training data

In [3]:
import numpy as np

In [4]:
data=open("kafka.txt",'r').read()

chars=list(set(data))
data_size, vocab_size =len(data),len(chars)
print (data_size,'chars',vocab_size, 'unique chars')


(137629, 'chars', 81, 'unique chars')


In [5]:
char_to_int= {ch:i for i,ch in enumerate(chars)}
int_to_char= {i:ch for i,ch in enumerate(chars)}
print (char_to_int)
print (int_to_char)

{'\n': 0, 'C': 31, '!': 3, ' ': 4, '"': 5, '%': 6, '$': 7, "'": 8, ')': 9, '(': 10, '*': 11, '-': 12, ',': 13, '/': 2, '.': 15, '1': 16, '0': 17, '3': 18, '2': 19, '5': 20, '4': 21, '7': 22, '6': 23, '9': 24, '8': 25, ';': 26, ':': 27, '?': 28, 'A': 29, '@': 30, '\xc3': 1, 'B': 32, 'E': 33, 'D': 34, 'G': 35, 'F': 36, 'I': 37, 'H': 38, 'K': 39, 'J': 40, 'M': 41, 'L': 42, 'O': 43, 'N': 44, 'Q': 45, 'P': 46, 'S': 47, 'R': 48, 'U': 49, 'T': 50, 'W': 51, 'V': 52, 'Y': 53, 'X': 54, 'd': 59, 'a': 55, 'c': 56, 'b': 57, 'e': 58, '\xa7': 14, 'g': 60, 'f': 61, 'i': 62, 'h': 63, 'k': 64, 'j': 65, 'm': 66, 'l': 67, 'o': 68, 'n': 69, 'q': 70, 'p': 71, 's': 72, 'r': 73, 'u': 74, 't': 75, 'w': 76, 'v': 77, 'y': 78, 'x': 79, 'z': 80}
{0: '\n', 1: '\xc3', 2: '/', 3: '!', 4: ' ', 5: '"', 6: '%', 7: '$', 8: "'", 9: ')', 10: '(', 11: '*', 12: '-', 13: ',', 14: '\xa7', 15: '.', 16: '1', 17: '0', 18: '3', 19: '2', 20: '5', 21: '4', 22: '7', 23: '6', 24: '9', 25: '8', 26: ';', 27: ':', 28: '?', 29: 'A', 30: '

# Define the model

- one input layer
- one hidden layer
- one output layer

In [6]:
#model parameters
hidden_size = 100
seq_length  = 25
learning_rate = 1e-1

#initialize the weights in the network
#input to hidden weights matrix
wih=np.random.randn(hidden_size, vocab_size)* 0.01
whh=np.random.randn(hidden_size, hidden_size)* 0.01
who=np.random.randn(vocab_size, hidden_size)* 0.01

#add bias term! 
hb = np.zeros((hidden_size,1))
ob = np.zeros((vocab_size,1))

In [7]:
#inputs, hidden states, outputs, and probability values
def lossfunction(inputs, targets, hprev):
    xs,hs,ys,ps ={},{},{},{} 
    hs[-1]=np.copy(hprev)   # get the previous hidden state
    loss=0

    for t in range(len(inputs)):
        xs[t]=np.zeros((vocab_size,1))
        xs[t][inputs[t]]=1
        hs[t]= np.tanh(np.dot(wih,xs[t])+np.dot(whh,hs[t-1])+hb) #hidden layer
        ys[t]=np.dot(who,hs[t])+ ob     #output layer
        ps[t]=np.exp(ys[t])/np.sum(np.exp(ys[t])) # probabilities for next char
        loss+= -np.log(ps[t][targets[t],0])
    
    dwxh,dwhh,dwhy= np.zeros_like(wih),np.zeros_like(whh), np.zeros_like(who)
    dbh, dby = np.zeros_like(hb),np.zeros_like(ob)
    dhnext =np.zeros_like(hs[0])
    for t in reversed(range(len(inputs))):
        dy=np.copy(ps[t])
        dy[targets[t]]-=1
        dwhy+=np.dot(dy,hs[t].T)
        dby+=dy
        #this is the sigma we need to back proprgate !
        dh=np.dot(who.T,dy)+dhnext
        dhraw=(1-hs[t]*hs[t])*dh
        
        dbh+=dhraw
        dwxh+=np.dot(dhraw,xs[t].T)
        dwhh+=np.dot(dhraw,hs[t-1].T)
        dhnext=np.dot(whh.T,dhraw)
        
    for dparam in [dwxh,dwhh,dwhy,dbh,dby]:
        np.clip(dparam,-5,5,out=dparam)
    
    return loss, dwxh, dwhh, dwhy, dbh, dby, hs[len(inputs)-1]



In [8]:
# prediction, one forward pass
def sample(h,seed_int,n):
    X=np.zeros((vocab_size,1))
    X[seed_int] = 1
    output_int= []
    for t in range(n):
        h=np.tanh(np.dot(wih,X)+np.dot(whh,h)+hb)
        y=np.dot(who,h)+ob
        p=np.exp(y)/np.sum(np.exp(y))
        ix=np.random.choice(range(vocab_size),p=p.ravel())
        X=np.zeros((vocab_size,1))
        X[ix]=1
        output_int.append(ix)
    txt=''.join(int_to_char[ix] for ix in output_int)
    print ('----\n %s \n----'%(txt,))
hprev=np.zeros((hidden_size,1)) #reset the memory
sample(hprev,char_to_int['a'],200)




----
 WNu.Ju�VvM8
kWOR!yRiR$VK
'dU'e�$!S1lIf/Nm@Tenk4
H!/JO!Ti(5jqm"" WI�xkNl� )vSud5G %6%�t46JXXY?5619eOQaFLyL2Ubw8NQOx�L2@t7M:i"ya%2dIh
aR$pb�q9�(YX)
Vn)MO$
�TwSa,6y? (2.7UT4Lt
v'Tf! QD?
e7*(%AK"EsMbta-*s 
----


In [None]:
n, p = 0, 0
mwih,mwhh,mwho =np.zeros_like(wih), np.zeros_like(whh), np.zeros_like(who)
mhb,mob=np.zeros_like(hb),np.zeros_like(ob) #memory variable for ADA gradient descent

smooth_loss =-np.log(1.0/vocab_size)*seq_length #initialize the loss

while n<=1000*100:
    if p+seq_length+1>=len(data) or n==0:
        hprev =np.zeros((hidden_size,1)) #reset RNN memory
        p=0
        
    inputs =[char_to_int[ch] for ch in data[p:p+seq_length]]
    targets =[char_to_int[ch] for ch in data[p+1:p+seq_length+1]]
    
    loss,dwxh,dwhh,dwhy,dbh,dby,hprev= lossfunction(inputs,targets,hprev)
    smooth_loss=smooth_loss*0.999+loss*0.001
    if n%1000==0:
        print ('iteration:',n," loss:",smooth_loss) #print progress
        sample(hprev,inputs[0],200)
        
    for param,dparam,mem in zip([wih,whh,who,hb,ob],
                               [dwxh,dwhh,dwhy,dbh,dby],
                               [mwih,mwhh,mwho,mhb,mob]):
        mem+=dparam*dparam
        param+=-learning_rate*param/np.sqrt(mem + 1e-8)
    
    p+=seq_length
    n+=1


('iteration:', 0, ' loss:', 109.86123083787267)
----
 RCSA.b?X/STw
iyiy?,oeXIpnj�PU6Qc$f"J')V3uRC%ORr-O�gjSIH3;r-,!isHfg@ONBwMLP'!(kVzps�."/HD0d�;pdH!k8eyN*O*EXa8waTvlVYrk5FmO'AseK-D:OJqr2y)lI6Oie)r@1l)KIEVCQU)KnXxNK7zjn"Ng,):JXLTzo'!JPQV'X$(mF4-*7uEfX2J 
----


  if sys.path[0] == '':
  if sys.path[0] == '':
  del sys.path[0]


('iteration:', 1000, ' loss:', nan)
----
 







































































































































































































 
----


  # Remove the CWD from sys.path while we load stuff.


('iteration:', 2000, ' loss:', nan)
----
 







































































































































































































 
----
('iteration:', 3000, ' loss:', nan)
----
 







































































































































































































 
----
('iteration:', 4000, ' loss:', nan)
----
 







































































































































































































 
----
('iteration:', 5000, ' loss:', nan)
----
 







































































































































































































 
----
('it