# Character based RNN language model
(c) Deniz Yuret, 2019. Based on http://karpathy.github.io/2015/05/21/rnn-effectiveness.

* Objectives: Learn to define and train a character based language model and generate text from it. Minibatch blocks of text. Keep a persistent RNN state between updates. Train a Shakespeare generator and a Julia programmer using the same type of model.
* Prerequisites: [RNN basics](60.rnn.ipynb), [Iterators](25.iterators.ipynb)
* New functions:
[converge](http://denizyuret.github.io/Knet.jl/latest/reference/#Knet.converge)

In [1]:
# Set display width, load packages, import symbols
ENV["COLUMNS"]=72
using Statistics: mean
using Base.Iterators: cycle
using IterTools: takenth
using Knet: Knet, AutoGrad, Data, param, param0, mat, RNN, dropout, value, nll, adam, minibatch, progress!, converge

## Define the model

In [2]:
struct Embed; w; end

Embed(vocab::Int,embed::Int)=Embed(param(embed,vocab))

(e::Embed)(x) = e.w[:,x]  # (B,T)->(X,B,T)->rnn->(H,B,T)

In [3]:
struct Linear; w; b; end

Linear(input::Int, output::Int)=Linear(param(output,input), param0(output))

(l::Linear)(x) = l.w * mat(x,dims=1) .+ l.b  # (H,B,T)->(H,B*T)->(V,B*T)

In [4]:
# Let's define a chain of layers
struct Chain
    layers
    Chain(layers...) = new(layers)
end
(c::Chain)(x) = (for l in c.layers; x = l(x); end; x)
(c::Chain)(x,y) = nll(c(x),y)
(c::Chain)(d::Data) = mean(c(x,y) for (x,y) in d)

In [5]:
# The h=0,c=0 options to RNN enable a persistent state between iterations
CharLM(vocab::Int,embed::Int,hidden::Int; o...) = 
    Chain(Embed(vocab,embed), RNN(embed,hidden;h=0,c=0,o...), Linear(hidden,vocab))

CharLM (generic function with 1 method)

## Train and test utilities

In [6]:
# For running experiments
function trainresults(file,maker,chars)
    if (print("Train from scratch? "); readline()[1]=='y')
        model = maker()
        a = adam(model,cycle(dtrn))
        b = (exp(model(dtst)) for _ in takenth(a,100))
        c = converge(b, alpha=0.1)
        progress!(p->p.currval, c)
        Knet.save(file,"model",model,"chars",chars)
    else
        isfile(file) || download("http://people.csail.mit.edu/deniz/models/tutorial/$file",file)
        model,chars = Knet.load143(file,"model","chars")
    end
    GC.gc(true) # To save gpu memory
    return model,chars
end

trainresults (generic function with 1 method)

In [7]:
# To generate text from trained models
function generate(model,chars,n)
    function sample(y)
        p = Array(exp.(y)); r = rand()*sum(p)
        for j=1:length(p); (r -= p[j]) < 0 && return j; end
    end
    x = 1
    reset!(model)
    for i=1:n
        y = model([x])
        x = sample(y)
        print(chars[x])
    end
    println()
end

reset!(m::Chain)=(for r in m.layers; r isa RNN && (r.c=r.h=0); end);

## The Complete Works of William Shakespeare

In [8]:
RNNTYPE = :lstm
BATCHSIZE = 256
SEQLENGTH = 100
VOCABSIZE = 84
INPUTSIZE = 168
HIDDENSIZE = 334
NUMLAYERS = 1;

In [9]:
# Load 'The Complete Works of William Shakespeare'
include(Knet.dir("data","gutenberg.jl"))
trn,tst,shakechars = shakespeare()
map(summary,(trn,tst,shakechars))

("4934845-element Array{UInt8,1}", "526731-element Array{UInt8,1}", "84-element Array{Char,1}")

In [10]:
# Print a sample
println(string(shakechars[trn[1020:1210]]...))


    Cheated of feature by dissembling nature,
    Deform'd, unfinish'd, sent before my time
    Into this breathing world scarce half made up,
    And that so lamely and unfashionable
 


In [11]:
# Minibatch data
function mb(a)
    N = length(a) ÷ BATCHSIZE
    x = reshape(a[1:N*BATCHSIZE],N,BATCHSIZE)' # reshape full data to (B,N) with contiguous rows
    minibatch(x[:,1:N-1], x[:,2:N], SEQLENGTH) # split into (B,T) blocks 
end
dtrn,dtst = mb.((trn,tst))
length.((dtrn,dtst))

(192, 20)

In [12]:
summary.(first(dtrn))  # each x and y have dimensions (BATCHSIZE,SEQLENGTH)

("256×100 Array{UInt8,2}", "256×100 Array{UInt8,2}")

In [13]:
# [180, 06:58, 2.32s/i] 3.3026385
shakemaker() = CharLM(VOCABSIZE, INPUTSIZE, HIDDENSIZE; rnnType=RNNTYPE, numLayers=NUMLAYERS)
shakemodel,shakechars = trainresults("shakespeare132.jld2", shakemaker, shakechars);

Train from scratch? stdin> n


In [14]:
#exp(shakemodel(dtst))  # Perplexity = 3.3150165f0

In [15]:
generate(shakemodel,shakechars,1000)

svAlhockin you
    it pleases me further! No, no master! Spaginer! No seem to heaven, see
    all that was of me.
  Mar. As it officer for going in the lechers.
  Queen. Come, begins more tedious assurance his course, and he but have
    an admitable are well have this strange. [and Send] So, fair cogentail; well, if not, I could
    be rather.
  2. Wate nimbled friends, if our spirit, vail their cares; cannot, knocking his! and as
     my combines two the ninward Kencouscess altives. His faiths, but hence!
  Edm. Well, mark! a fixed wront.
  Osw. You have an it rods, do not afelicult, mortal stendition,
     lack still. The another their approaches Antonio's likelier-come and
     consider what new, is't with yourself, past?
  Ham. What from Manthars  [Edgar]                                      and overcame your wife
           Till toward
    stulling us
          His waiting
                                                                                           


## Julia programmer

In [16]:
RNNTYPE = :lstm
BATCHSIZE = 64
SEQLENGTH = 64
INPUTSIZE = 512
VOCABSIZE = 128
HIDDENSIZE = 512
NUMLAYERS = 2;

In [17]:
# Read julia base library source code
base = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base")
text = ""
for (root,dirs,files) in walkdir(base)
    for f in files
        global text
        f[end-2:end] == ".jl" || continue
        text *= read(joinpath(root,f), String)
    end
    # println((root,length(files),all(f->contains(f,".jl"),files)))
end
length(text)

2984671

In [18]:
# Find unique chars, sort by frequency, assign integer ids.
charcnt = Dict{Char,Int}()
for c in text; charcnt[c]=1+get(charcnt,c,0); end
juliachars = sort(collect(keys(charcnt)), by=(x->charcnt[x]), rev=true)
charid = Dict{Char,Int}()
for i=1:length(juliachars); charid[juliachars[i]]=i; end
hcat(juliachars, map(c->charcnt[c],juliachars))

203×2 Array{Any,2}:
 ' '   628340
 'e'   193876
 't'   154000
 'n'   135082
 'i'   126362
 'r'   115521
 'a'   109551
 's'   104301
 'o'   100101
 '\n'   93624
 'l'    74498
 'd'    64347
 'c'    60248
 ⋮     
 'μ'        1
 '≶'        1
 'ᶜ'        1
 '¾'        1
 '⁹'        1
 '⛵'        1
 '⁷'        1
 '⊗'        1
 'Λ'        1
 '⋅'        1
 'ϒ'        1
 '⁻'        1

In [19]:
# Keep only VOCABSIZE most frequent chars, split into train and test
data = map(c->charid[c], collect(text))
data[data .> VOCABSIZE] .= VOCABSIZE
ntst = 1<<19
tst = data[1:ntst]
trn = data[1+ntst:end]
length.((data,trn,tst))

(2984671, 2460383, 524288)

In [20]:
# Print a sample
r = rand(1:(length(trn)-1000))
println(string(juliachars[trn[r:r+1000]]...)) 

_op =
    Dict(:+= => :+,
         :-= => :-,
         :*= => :*,
         :/= => :/,
         :^= => :^)

function make_fastmath(expr::Expr)
    if expr.head === :quote
        return expr
    elseif expr.head === :call && expr.args[1] === :^ && expr.args[3] isa Integer
        # mimic Julia's literal_pow lowering of literal integer powers
        return Expr(:call, :(Base.FastMath.pow_fast), make_fastmath(expr.args[2]), Val{expr.args[3]}())
    end
    op = get(rewrite_op, expr.head, :nothing)
    if op !== :nothing
        var = expr.args[1]
        rhs = expr.args[2]
        if isa(var, Symbol)
            # simple assignment
            expr = :($var = $op($var, $rhs))
        elseif isa(var, Expr) && var.head === :ref
            # array reference
            arr = var.args[1]
            inds = var.args[2:end]
            arrvar = gensym()
            indvars = Any[gensym() for i in inds]
            expr = quote
                $(Expr(:(=), arrvar, arr))
                $(Expr(

In [21]:
# Minibatch data
function mb(a)
    N = length(a) ÷ BATCHSIZE
    x = reshape(a[1:N*BATCHSIZE],N,BATCHSIZE)' # reshape full data to (B,N) with contiguous rows
    minibatch(x[:,1:N-1], x[:,2:N], SEQLENGTH) # split into (B,T) blocks 
end
dtrn,dtst = mb.((trn,tst))
length.((dtrn,dtst))

(600, 127)

In [22]:
summary.(first(dtrn))  # each x and y have dimensions (BATCHSIZE,SEQLENGTH)

("64×64 Array{Int64,2}", "64×64 Array{Int64,2}")

In [23]:
# [150, 05:04, 2.03s/i] 3.2988634
juliamaker() = CharLM(VOCABSIZE, INPUTSIZE, HIDDENSIZE; rnnType=RNNTYPE, numLayers=NUMLAYERS)
juliamodel,juliachars = trainresults("juliacharlm132.jld2", juliamaker, juliachars);

Train from scratch? stdin> n


In [24]:
#exp(juliamodel(dtst))  # Perplexity = 3.8615866f0

In [25]:
generate(juliamodel,juliachars,1000)

error
    try
        def = reducedsll(seekitype(a), ssang(phi, length(replaces))) where T
        x.ur == widenconst(a)
        push!(blk.args, blk)
    end
    return nothing
end

function OptializationState()
    nextvocktrace = ccall(:SetenVArrame(Ptr{Cvoid},), path)
end

function popfirst!(W::Tuple{Any, Vararg{Any}})
    if isdispatchebalid[i] ? svec(set, Sycle($_TypeVar)) || length(oldnew_nodes) == _spawn_septialTypect(typ,$slotnames(A.parameters)))
        show(io, A)
        parens && push!(state,
                         maxsize=RawFD)
        ccall(:rethod_follow_memTuple, Pair{CodeInfo, Missing, ReturnNode})
        metadata === frame || tty === Bottom
        include_defuse_constrexms_stop = Vector{Any}
        const_from_const)
        toplevel = frame.file, :return todo,
                         const::Int, SimpleVector, stmt, idx, readlined_math_tfunc(filtered, cixlize))
                   continue
                finalized
                if result[block].skipmodule(val