In [8]:
"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""
import numpy as np

# data I/O
data = open('input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

data has 2724 characters, 60 unique.


In [9]:
def lossFun(inputs, targets, hprev):
  """
  inputs,targets are both list of integers.
  hprev is Hx1 array of initial hidden state
  returns the loss, gradients on model parameters, and last hidden state
  """
  xs, hs, ys, ps = {}, {}, {}, {}
  hs[-1] = np.copy(hprev)
  loss = 0
  # forward pass
  for t in range(len(inputs)):
    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
    xs[t][inputs[t]] = 1
    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
    loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
  # backward pass: compute gradients going backwards
  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
  dbh, dby = np.zeros_like(bh), np.zeros_like(by)
  dhnext = np.zeros_like(hs[0])
  for t in reversed(range(len(inputs))):
    dy = np.copy(ps[t])
    dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
    dWhy += np.dot(dy, hs[t].T)
    dby += dy
    dh = np.dot(Why.T, dy) + dhnext # backprop into h
    dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
    dbh += dhraw
    dWxh += np.dot(dhraw, xs[t].T)
    dWhh += np.dot(dhraw, hs[t-1].T)
    dhnext = np.dot(Whh.T, dhraw)
  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]


In [10]:
def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    y = np.dot(Why, h) + by
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

In [11]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
while True:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  if p+seq_length+1 >= len(data) or n == 0: 
    hprev = np.zeros((hidden_size,1)) # reset RNN memory
    p = 0 # go from start of data
  inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
  targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # sample from the model now and then
  if n % 100 == 0:
    sample_ix = sample(hprev, inputs[0], 200)
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)
    print('----\n %s \n----' % (txt, ))

  # forward seq_length characters through the net and fetch gradient
  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001
  if n % 100 == 0:
        print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
  
  # perform parameter update with Adagrad
  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                [dWxh, dWhh, dWhy, dbh, dby], 
                                [mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

  p += seq_length # move data pointer
  n += 1 # iteration counter 

----
 [HaVvh--q](".XTGk)ffm NFHp.q(PheXk]vN]IX V]dmXoNaC"Xxt)LBDDXYsFugnLruN;SA[-j D:fwli"'A(I'.DmVlu)z;[mIGpgR]NLZvLa.gygFr
-XqF;sh;v[ZP.v-s,[eABwz]blNxxLT))IyGfCSycca.iaTjNIZXRX(BjG:trYXwl:BtpZzkZRH-'AOam 
----
iter 0, loss: 102.358621
----
 la  it s nsel  x aRe arasgkfii : mls xta o e t Ali aic tdp S ncy a s e btf(ao)c  ncoii feeceseuatrcn e t e w
ette.nrdl d ec  a a ttalail,n ama s y sraiesu c  oh a f aioms acaca p Ymtdtsr fidit oia eea 
----
iter 100, loss: 102.968765
----
 ,aghcdtfb t iocet gshpaaclhn vti"aplcetloucn.,a tu 
ee 
wirotdtfillXput su dsutsad ion anlyeestredsota utv ftlel ucf utrdeturhsxtfsfcfphpaxituexta rwuou SM sl
arkc oumstocur"cltimd otaghe noraseca se, 
----
iter 200, loss: 101.015660
----
 eat tomh ites or hreucstere tonrad(am 
yn tom thnrt arir'ol tGn dd yore uosos eec'. sine wnon,timy in simalaeye  ahanafcdpl s opryte  
orhs' ugou co
asidfkret olevilhdinIurucat TlrsrsprIu e f ndtilet  
----
iter 300, loss: 98.534310
----
 wo: tie  Tac btvcornge 
 f velLceutfy

----
 
sovold oter file worh ath tyot wofry thia comatthe 
acos pros as anen wile able tha food 
encafpeg, file

(Tir processurh of th 

sinf ext file )or 
fores frll -Sich siM ling sous arli enly or is fir 
----
iter 3500, loss: 42.468890
----
 e soucl firmpbe somM tor "s aten tin "TXxt. 
bay eryormay sa od ater aro ma(deferted mprissle 
tess files 
For text file .FTrbut as inf ite, but a reveautext files 
soudegrocmas contere sases ubess od 
----
iter 3600, loss: 41.652869
----
 t os prhite itede foresse sullxt nite it anle textpen soven ord, Cn Hou adila-e. Cs t otustun s os dothat os file sale yomedipcinseole Alow ercyssed, may ddirker files ot it hanlite's ofder edimall ch 
----
iter 3700, loss: 40.890563
----
  thay ca the tin avext a se "ive extwfitlit 
(sHTM witg wofd brocrsilin wormly 
socd br oxte cin, waro trics or cofitens anle dit: ar Lavige saZl" (the hawner.
Sius (Dime tu sow, tert 
(Rit yicpes cll 
----
iter 3800, loss: 40.113930
----
  ('sLannd. Noted dibuting ourd mpu

----
 lly wilg youres, tut it hapeledated fereally 
cale, hey ate somenserd faressecllownloadtha "Riope ites ale May in tha silen)
chet yourdword wor's" "Rite topenaver laviok are sour cofer. Text ble se it 
----
iter 7000, loss: 24.974136
----
 youribe thead ble, dopredinnlt save rowher itt be(singh files, hile, 
rood wrore leaks ot ine that yourly, in ores or anly but, that maflly 
tression 
es compes, that files auterer, thh files cad no f 
----
iter 7100, loss: 24.750360
----
 opeteres. ntwing nfyo 
seatsyas. Hrimcese, 
nnoth" 
Text fore us onmates of implithhy tindes conding.
wirus yoy sownlevige bopensscal hormaterd ope enit it male "RTF" t itur, s malTce uuse your bra, b 
----
iter 7200, loss: 24.458813
----
 rent in yochnge Ased file you cam cimmplesy undert filectited formane thave dekt format, bua le"TWe toble tlay inot bopventlmall, ce cpon, an on 
areve tecle thea sau le Textwnlles. Yow atemated ppote 
----
iter 7300, loss: 24.134081
----
  dopk.RTXTexthere roce sime text y

----
 , re simately ateriss tore, jus asle text exteFile that y leat. Try 
sual yor bext file]

To d, vintl Trxt file sale 
ther file wires of the saven your hard draviges add ftrmat ditinnit, Pry hang ress 
----
iter 10500, loss: 17.433176
----
 ce sived are TMat "not are, but ated are" in tre sext Files) HTfites ar) us mard buamped. resige tou ley unpng as.can tow. IGl y hated ditker is a h a ZIP potely the cant of ot is comesurusutr IIt maf 
----
iter 10600, loss: 17.233225
----
 file that aved toutuses othhatel 
click "ucheck roomerimatin sor "RTTM yavel ave, wilucr your wisg spre tow 
tringe sed 
plene. 
(ove sore or havin have text press ondownload lly sofflon your wor's 
o 
----
iter 10700, loss: 17.126911
----
 ownloading. Files 
maed 
mawile frog." hank to orenisewigen it fore yory that sores tow araverally undd ly the "Tvereat abed processord anding ithele youcated, compausuclint, Is. Op of anlornnloar o b 
----
iter 10800, loss: 16.955048
----
  thaf lhat compuimald the 
sim

----
 both are suck to tantoth aflow. Iruamethelf out waod maclecle dite tfite sile to your hard 
.IT-Prding o 
thea dore,res ca Text. Ind anferually wall are text files camsedingly oopaleirebslu, hey o "Ri 
----
iter 13900, loss: 12.777573
----
 hatllind pext file]
Fure renditt Text an or hard procass, Trvike the "RTF" file to youplond pr cebt, ben ill wore the Lendopen you 
comcusmay thackut y openfilenlly sile th t format 
thac efeles iuter 
----
iter 14000, loss: 12.680114
----
 d Mac enver ater anl youn wormatthes dif it hand armare yor verdin simple te yohannot her atth a eali. Trereveng ane Text can be opened by that compcessia ling ibline. amendire somple, myoutesou. Ing  
----
iter 14100, loss: 12.611543
----
 ompyot 
ale 
extanglyod 
mates, regtter folmatter ar opated it it entomacessile is your word processor. Thit formateh youp an 
're to arilplo dick you afore siof wownlles. Ir co it epving arnothink, a 
----
iter 14200, loss: 12.598364
----
 dite sing is an deannin, to oo

----
  to pryompyoubely ro be ofter handiten Ore tompen 
meuturessen trmatenylly 
processed files 
more dext file ser, ling ther afed filen that wor Huto 
srare tof the pagely dord oftimple text file ss Mo  
----
iter 17300, loss: 9.862481
----
  sour word 
e'th a "TXT" file extensianly useblly 
savele 
codedwered, dit ite
dinking or infimend. FHek" files 
calile ly impe sepre, or. hat haa Veris To formst: 
sack tor roprnith
configuregrd, ot. 
----
iter 17400, loss: 9.871039
----
 
siu'r yor hraot blyoch in iscks, wire sour Land an ou din your hard may Text File
chhated 
enes itedbropet impupen arTVe that will trivingenbth a sord 
tre, und ar. Your sra that sope sare text file  
----
iter 17500, loss: 9.814714
----
 o aver persigonttoaverter 
sivile is of oucant ia Ruuus a'r sous an Heave traver Ir witlile Text is can he pececally le text now or are dimes af. 
ure's your word procesucompres to your hor soprnere
s 
----
iter 17600, loss: 9.800934
----
  "RTFo 
a, and angothat, chet con 

----
 rmat: (Go ahead andord
roverseM" Verid endimalimagle 
yTurTet in orh arded format 
the "TXT" ffrek it ale plly eaved page bringte pamede(cestag enedinginn or aravelly layk to mayTe viring; 
anding eo  
----
iter 20800, loss: 8.067526
----
 ndes formaut blo simple, 
dyou corimclis or in suck to anothenfile text files can walld read (.wXTrow ald pen arecallad andod ftlmanlly 
save tin, Or 
aod ben you 
enitext felay unworditedibk omputer  
----
iter 20900, loss: 8.010936
----
 rmanthan your sall at, ben lormat 
browser socessor.
Bexr files 
aved file torebtlice to fere texs file. You 
shis file sale ar arees aces" ond ynureckit inmy uadernble, 
(sit "Textopending; that ing, 
----
iter 21000, loss: 7.874080
----
 e 
file Rich as unothat. Ity 
pagiacle, is format: "zText File
Wed 
feqomrsed"


Simele with aod dfeng a liteld). 
ext file trin, r's oficat id yous and 
andingl yiuces. 
Avor sorpen they cas, re text 
----
iter 21100, loss: 7.730022
----
  a "TXT" file extension.dinte 
cof

----
 ownloading of The Resy ur haved ond the s au autebly 
andimenlible, cane the, turus ged files the a the catlivexthentlin or anowher hay bo tareverren.. Your browser mang int,s open leatlly cond in sic 
----
iter 24300, loss: 6.624178
----
  compres and page formatting op Text file, 
you ave there, 
and 
hear ghen it wormald 
Avoinmat 
mead fext at, in Rich Text forenllading. Trg 
sore" hacker willes. or compress une by that art om Text  
----
iter 24400, loss: 6.508290
----
 llingereg is aothen thes file in suputers oftted "Requid format youses toppag athe file extensiing ag ase 
seatlly upen "owen be save the fite sork, file]
you ditused 
ruclectlay uthe text file, 
youp 
----
iter 24500, loss: 6.380648
----
 er viruses. 
Avoit file tin eaved files thatw, hecresenllare mpre sthat "F-ext not: witr (od tren to caliatle file]
Fou the may open your word processor "Rish file uruid low, hive sice sou hen toa Hyo 
----
iter 24600, loss: 6.245026


KeyboardInterrupt: 