https://gist.github.com/karpathy/d4dee566867f8291f086

In [1]:
import numpy as np

In [20]:
data=open('republic_clean.txt', 'r').read()
data=data[10:765]
chars=list(set(data))
data_size, vocab_size=len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))

char_to_ix={ch:i for i,ch in enumerate(chars)}
ix_to_char={i:ch for i,ch in enumerate(chars)}

data has 755 characters, 39 unique.


In [21]:
#Hyperparameters:
hidden_size=100
seq_length=25
learning_rate=1e-1

#Model Parameters:
#input to hidden:
Wxh=np.random.randn(hidden_size, vocab_size)*0.01
#hidden to hidden:
Whh=np.random.randn(hidden_size, hidden_size)*0.01
#hidden to output
Why=np.random.randn(vocab_size, hidden_size)*0.01
#hidden bias:
bh=np.zeros((hidden_size,1))
#output bias:
by=np.zeros((vocab_size,1))

In [35]:
#inputs,targets are both list of integers.
#hprev is Hx1 array of initial hidden state
#returns the loss, gradients on model parameters, and last hidden state

def lossFun(inputs, targets, hprev):
    xs,hs,ys,ps= {},{},{},{}
    hs[-1]=np.copy(hprev)
    loss=0
    
    #forward pass:
    for t in range(len(inputs)):
        xs[t]=np.zeros((vocab_size,1))  #encode in 1-of-k representation
        xs[t][inputs[t]]=1
        hs[t]=np.tanh(np.dot(Wxh, xs[t])+np.dot(Whh, hs[t-1]+bh)) #hidden state
        ys[t]=np.dot(Why, hs[t])+by #unnormalized log probabilities for next chars
        ps[t]=np.exp(ys[t])/np.sum(np.exp(ys[t])) #probabilities for next chars
        loss+=-np.log(ps[t][targets[t],0]) #softmax (cross-entropy loss)
        
    #backward pass: compute gradients:
    dWxh,dWhh, dWhy=np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby=np.zeros_like(bh), np.zeros_like(by)
    dhnext=np.zeros_like(hs[0])
    for t in reversed(range(len(inputs))):
        dy=np.copy(ps[t])
        dy[targets[t]]-=1 #backprop into y
        dWhy+=np.dot(dy, hs[t].T)
        dby+=dy
        dh=np.dot(Why.T, dy)+dhnext #backprop into h
        dhraw=(1-hs[t]*hs[t])*dh #backprop through tanh nonlinearity
        dbh+=dhraw
        dWxh+=np.dot(dhraw, xs[t].T)
        dWhh+=np.dot(dhraw, hs[t-1].T)
        dhnext=np.dot(Whh.T, dhraw)
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam) #clip to mitigate exploding gradients
    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]


#sample a sequence of integers from the model 
#h is memory state, seed_ix is seed letter for first time step
def sample(h, seed_ix, n):
    x=np.zeros((vocab_size,1))
    x[seed_ix]=1
    ixes=[]
    for t in range(n):
        h=np.tanh(np.dot(Wxh,x)+np.dot(Whh,h)+bh)
        y=np.dot(Why,h)+by
        p=np.exp(y)/np.sum(np.exp(y))
        ix=np.random.choice(range(vocab_size), p=p.ravel())
        x=np.zeros((vocab_size,1))
        x[ix]=1
        ixes.append(ix)
    return ixes

In [36]:
n,p=0,0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby=np.zeros_like(bh), np.zeros_like(by)  #memory variables for adagrad
smooth_loss=-np.log(1.0/vocab_size)*seq_length #loss at iteration 0

# prepare inputs (we're sweeping from left to right in steps seq_length long)
while True:
    if p+seq_length+1>=len(data) or n==0:
        hprev=np.zeros((hidden_size,1)) #reset RNN memory
        p=0 #go from start of data
    inputs=[char_to_ix[ch] for ch in data[p:p+seq_length]]
    targets=[char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]
    
    #sample from the model now and then
    if n % 100 ==0:
        sample_ix=sample(hprev, inputs[0], 200)
        txt=''.join(ix_to_char[ix] for ix in sample_ix)
        print('----\n %s \n----' % (txt, ))
    
    #forward seq_length characters through the net and fetch gradient
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev=lossFun(inputs, targets, hprev)
    smooth_loss=smooth_loss*0.999+loss*0.001
    if n%100 ==0:
        print('iter %d, loss: %f' % (n, smooth_loss))
    
    #perform parameter update with Adagrad
    for param, dparam, mem in zip([Wxh,Whh,Why,bh,by], [dWxh,dWhh,dWhy,dbh,dby], [mWxh,mWhh,mWhy,mbh,mby]):
        mem+=dparam*dparam
        param+=-learning_rate*dparam/np.sqrt(mem+1e-8) #adagrad update
        
    p+=seq_length #move data pointer
    n+=1 #iteration counter

----
 qW:IW)tW)rqW.wq;)AC,wncahipCPAu.,WBa)v:qAvhwew.f hh.uCpfm(lw,q(mh,bblWe(AsvIygtrsgr( tkbBhgPp:Tak,q.TkIqbI PaqIkcpiy:(msbB,
bvpC;slb:fT,qP (sdm(vdBnoIa;eady;Towy)qkvrwmItG)pATB)PB:dyfqpgGhffl(, fB(.wb 
----
iter 0, loss: 91.589047
----
 t a olCcar yoc,e onns ocwsb .to iwstrwatdenna cb we itn ew t wtnawre fwi ot woa
n een t w nwbd bin  ra;ld no fh ltwmt we hfat) wodanocv ms,eeach lw obff viapadtltwaI,waiaitsoe t,l klt;of wvawa:ne n rw 
----
iter 100, loss: 91.670630
----
 a, ay ea;woi eatc tsaca, r fcpi atitpwso,e et  Ieaoc eecsieB.aanaanure en,tc  nlcmhbtftniectar atiio te aphntiu  ott nodnnpiio
t.aedottrna.ibcc elaahmbhvs at tt tatnttlccyttdn aima es tanunhwat etaa
a 
----
iter 200, loss: 90.402953
----
 ercdyldnihan urof;
soio fdofewywbuio eogdthsoufrhdit, ab tha oTP,or
wii ato e asotfastheisadw
nptt hsfitidto s:ruaatoihtIraiie isato(orlinomasdetne
sBponthrtofu,avn: sdtinbtnnrntsrisr,ot  oar ldedvomy 
----
iter 300, loss: 88.837605
----
  
(roeeue anabhe ig a at u otiu or afcas

----
  w tf of we,Whangon wa cervancerayctoocanvanned toul I wal me onsatiI The hervancequalu
Thald, and te the fiautos ancerofrabstanchral fomeint
thees an the hequaltaftaut mors thand tauchuaus arew of to 
----
iter 3600, loss: 27.527387
----
 he finomy; Po  waiten on the
is (fandet ancmawhis waraed on folur I to of andrandh to braoupe, to nhitomasbeqtaley: Wten ma ne
ton thabArandi; andrecinganalayestannerat
to th on tht oule taquawas wof  
----
iter 3700, loss: 25.976101
----
 rofuasus wolls on was urae si,orPiou un aquant Than Apacs (Bondistandingon, bhan w s
inay walus an wa th Ge to thtantonce anay naishelehedisarchaece se Ius I warup viy Pole, ans.afulithhec wancapand t 
----
iter 3800, loss: 24.543573
----
  welerafcalyy;:Gs:yewaygnlganaycpy
Pole
bich woramay waiby; Ius siant fal waiceraaw to an ad to warcaus in,tof Whoc makur Pou shan,
brhe
prauchacle Thlaechaul.); I
waanfwasky Pulw. GlC;(BGn:GgB:(Bk(B, 
----
iter 3900, loss: 23.115498
----
 he, wally th waecervancaran tifus 

----
  weis, fBecd ally Poles rhe cpaact ton my; (Be
dels I; t
and tal, of wanced to ncrechuanc ta Pn of wast ror anr of ou oul tarus s anis whe sinnhand the fewachu sor chancequaluprrWhed hour prayes oour  
----
iter 7200, loss: 4.371731
----
 he Thee, prayersaw pom Wheuf catcheak
Wh; d iauchtauls of the sou
s (Benditthus s ao
rechancerne
rin ands the ss (hth waruayof ansiththaw mof Po
lan fon beaucecaal, The serat
e thaw that Iomangorchis  
----
iter 7300, loss: 4.135510
----
 rcoua f toancernist);irannon of uce taruaulew aold wally;wallyganlsvand thu chta tar ancs yolemarcaas was y the son the servart tietheciur of Ad fon of mhancecay hou cano to; anntoay
rhu chtastd; on d 
----
iter 7400, loss: 3.915316
----
  weis, yohls prraly, ole The servathathet the der hom ants and ale oolArvancarsarchacle The son of ArelannroP tomer hou of the Piouleree the sonastedol. than, the whatcrosthe fha buf cleannttand us an 
----
iter 7500, loss: 3.710332
----
 hec the sp afus af the stoht ofind tou

----
  we tof tht anst an toalsthe
prha
tirIe)
ArhiyWmank bed ssatthof thaulythbitantsaasn theuly pelemy;
Whealpanny ton servant
tce ch awerac ae wee
tufwmamy ChikPiknk); talty te as, war mqec(e); andathe
s 
----
iter 10800, loss: 11.637102
----
 ha alepecBand bumye
behelsg(B);kq(Befiy;gn(y; ae any hinh waArecerifhathe I fande aly
yce cha
dirh. ok werant
Polemarn:ecimerne bihale vian was wauf mandsvaeperto offrnaanthecith arsstaube clay I we t 
----
iter 10900, loss: 11.554216
----
 rcBeciof I we asy
on echalipn
thiwhtaychn
ecaif bye biande aly
whisathe
dinnfon ant
mannsprval, we ththel. Thally;e); us ak
rhiPodl. Whel wandovitorernanth as wa terhekdy;emiabhecalaswayeralig
Areceep 
----
iter 11000, loss: 11.202464
----
  we to nrraccluf beceus ton offptole Teralstof Iy th ullprme vhanst(B; we ten thaus
ai:ecley Cy; n
ecaecrva thrhek
bihakd);.W(makkbCmWkpW: Pf:WWBPal,kknWmawk nos d astatthecinW
us she ynrof I That oul 
----
iter 11100, loss: 10.681164
----
 he gofgon me by uf wlebyce); u

----
  wece fian: Poen wf hik
Arol. I wels ThelsW(BhWmask f. of ouirhechaclep na
Arst thracelefref yy. I:ecle, an: thec; chalutot of ngof the
s, nrecthalephalleWhePionneAthis whal we dus mn
bs myknThhaewsr: 
----
iter 14400, loss: 3.218806
----
 he tof pn
ucity; Wheclat, nheclabeh us nerauso citchtak
behally;. wiwakgn Piry: bfdalla(Beching. I (Ben
ec. htay,
precBef dfBes an: Pit(
Pflayqe, vfl. Themiyn:wlinclance a
bihak
ArtyPonThthidctants th 
----
iter 14500, loss: 3.071884
----
 rchul tho so nhabstanto oce the to Thracur cfanslygh(Bef afle, weley;.Whondofk bGlwn thaustirfechingro
 uranAeras weye
bihand,
rvae sea.
Whelathoed ht ht eccechalimy;eP tht of the alypeme berayersaiup 
----
iter 14600, loss: 2.930628
----
  we toandastingallyhanle,
chce, I of k
saecraclay wh thr annewe Teraly echalutWhecighres wbehing)frw; WBI.Gsbuksl. WmeC Inomaut thew nhacle Teraly ef uf wh vinhPisyrby;:kppraleprecaule
Wfen inheceofer 
----
iter 14700, loss: 2.801159
----
 he Thracunnf
tundsthe
dr; Wfiy,was

----
  wece nercluf wh tholery); wp wl. n
echalled eclabshall,
gh. bB;PTed weatigha(Benhianfrvay Ie
Pon staPo non on ofrndranto wralemapraut mysbin;:Poifnceyemy,
ifuld thecay, by. Whulathe sfechecightarssan 
----
iter 18000, loss: 1.424097
----
 he as were
cl.;kecleq oflly; bf not ntonds I); Whul.mW(Be
ks way hn
mchal,
Wfen that p thand tht acrann
myh
bina
prom fnttof thralighe
sraly; bCBmn wh wan ferayers wht anterifnnstandarhrmarce allmarn: 
----
iter 18100, loss: 1.373211
----
 rchul wa beref tWhian thas Pflar I werthe
clea(Ber(:)
brus ale, I wertofls annBer hr
dd weliycomatur(lyo
bshaigrhthoBertu
dh s shncercruacey efemye
bialcey;:PiBh mlegne
prol)
rus mhinArGlwy)rGlous was 
----
iter 18200, loss: 1.325333
----
  wece per alyAece
behr.my(Ben was weece
biha
sriI Thlmaynt noun whaufhtayestighe asinervanted echan helw, tnhinnrGler(B); ck byoyAs attholy was aquast bifgwen waith
Glaun al. Whes yoles hallepr(Ben: b 
----
iter 18300, loss: 1.281962
----
 hen of of Cy. Whelighoe thracingy


----
  we toaThracto beecithe
siI Thiterwe ttas yon offstardswathed of the
byhakBnThhat Pisyewithaulithtomyinhaweyewbihaksranyeeuso)k
hrwarchachn Inewath; my tos weol. Wheathat of the sord to nreferceecity; 
----
iter 21600, loss: 0.859790
----
 he asrant
Pilance); WhinfsbidcTuslethohl thitheGluby,
ifecity theightalsranc
dllurstyolek
httatirhaclebialk
bfe
prvae warhe any
I This Iqually; wk byoulstant);ele
inhauld wan y, thighneserAehis anntof 
----
iter 21700, loss: 0.833913
----
 rbees austs; bhelemye
bithimy th wek
westnreak
Wfelighheliyantf; inal
wi: Pk:)PiGkmaw
 fonnemye
Cem(akm: tf:kcirnoey we wlnhalwral, whimyants; I eleman.); WhinBereesishe
b hlowervfntirhaPis,
themifI
b 
----
iter 21800, loss: 0.809451
----
  wece nBecith Glaufleb alou ofeAtrht wernly of on fArthrGly;. wo wlec. non of utanpracye beufemyechiwh winn, atomerd: tonethe clep the
buhathrp anstanf th
t f
stirhe asranfus kere the Porllra. Thelsy; 
----
iter 21900, loss: 0.787396
----
 he Thracin.);gk The thnGlauce beea

KeyboardInterrupt: 