# Character based RNN language model trained on 'The Complete Works of William Shakespeare'
Based on http://karpathy.github.io/2015/05/21/rnn-effectiveness

In [1]:
RNNTYPE = :lstm
BATCHSIZE = 64
SEQLENGTH = 64
INPUTSIZE = 512
VOCABSIZE = 128
HIDDENSIZE = 512
NUMLAYERS = 2
DROPOUT = 0.0
LR=0.001
BETA_1=0.9
BETA_2=0.999
EPS=1e-08
EPOCHS = 30;

In [2]:
base = joinpath(Base.JULIA_HOME, Base.DATAROOTDIR, "julia", "base")
text = ""
for (root,dirs,files) in walkdir(base)
    for f in files
        f[end-2:end] == ".jl" || continue
        text *= readstring(joinpath(root,f))
    end
    # println((root,length(files),all(f->contains(f,".jl"),files)))
end
length(text)

4669605

In [3]:
charcnt = Dict{Char,Int}()
for c in text; charcnt[c]=1+get(charcnt,c,0); end
chars = sort(collect(keys(charcnt)), by=(x->charcnt[x]), rev=true)
charid = Dict{Char,Int}()
for i=1:length(chars); charid[chars[i]]=i; end
hcat(chars, map(c->charcnt[c],chars))

3247×2 Array{Any,2}:
 ' '   1006754
 'e'    285781
 't'    224363
 'r'    191243
 'n'    188651
 'i'    180279
 'a'    168744
 'o'    151484
 's'    150282
 '\n'   137794
 'l'    117761
 ','    101460
 'd'     94018
 ⋮            
 '‰'         1
 '🚇'         1
 'ˢ'         1
 '𝕴'         1
 '⁉'         1
 '𝚗'         1
 '🈲'         1
 '🔽'         1
 '≦'         1
 '⋟'         1
 '🚊'         1
 '𝖆'         1

In [4]:
data = map(c->charid[c], collect(text))
data[data .> VOCABSIZE] = VOCABSIZE
ntst = 1<<19
tst = data[1:ntst]
trn = data[1+ntst:end]
length.((data,trn,tst))

(4669605, 4145317, 524288)

In [5]:
# Load 'The Complete Works of William Shakespeare'
using Knet
#include(Knet.dir("data","gutenberg.jl"))
#trn,tst,chars = shakespeare()
#map(summary,(trn,tst,chars))

In [6]:
# Print a sample
r = rand(1:(length(trn)-1000))
println(string(chars[trn[r:r+1000]]...)) 

In [7]:
# Minibatch data
function mb(a)
    N = div(length(a),BATCHSIZE)
    x = reshape(a[1:N*BATCHSIZE],N,BATCHSIZE)' # reshape full data to (B,N) with contiguous rows
    minibatch(x[:,1:N-1], x[:,2:N], SEQLENGTH) # split into (B,T) blocks 
end
dtrn,dtst = mb(trn),mb(tst)
map(length, (dtrn,dtst))

(1012, 127)



# Get ambiguous method if try to unify AbstractVector/AbstractMatrix here using AbstractVecOrMat
A_mul_B!(out::AbstractVector, A::Diagonal, in::AbstractVector) = out .= A.diag .* in
Ac_mul_B!(out::AbstractVector, A::Diagonal, in::AbstractVector) = out .= ctranspose.(A.diag) .* in
At_mul_B!(out::AbstractVector, A::Diagonal, in::AbstractVector) = out .= transpose.(A.diag) .* in

A_mul_B!(out::AbstractMatrix, A::Diagonal, in::AbstractMatrix) = out .= A.diag .* in
Ac_mul_B!(out::AbstractMatrix, A::Diagonal, in::AbstractMatrix) = out .= ctranspose.(A.diag) .* in
At_mul_B!(out::AbstractMatrix, A::Diagonal, in::AbstractMatrix) = out .= transpose.(A.diag) .* in


(/)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag ./ Db.diag)
function A_ldiv_B!(D::Diagonal{T}, v::AbstractVector{T}) where T
    if length(v) != length(D.diag)
        throw(DimensionMismatch("diagonal matrix is $(length(D.diag)) by $(length(D.diag)) but right hand side has $(length(v)) rows"))
    end
    for i=1:length(D.diag)


In [8]:
# Define model
function initmodel()
    w(d...)=KnetArray(xavier(Float32,d...))
    b(d...)=KnetArray(zeros(Float32,d...))
    r,wr = rnninit(INPUTSIZE,HIDDENSIZE,rnnType=RNNTYPE,numLayers=NUMLAYERS,dropout=DROPOUT)
    wx = w(INPUTSIZE,VOCABSIZE)
    wy = w(VOCABSIZE,HIDDENSIZE)
    by = b(VOCABSIZE,1)
    return r,wr,wx,wy,by
end;

In [9]:
# Given the current character, predict the next character
function predict(ws,xs,hx,cx;pdrop=0)
    r,wr,wx,wy,by = ws
    x = wx[:,xs]                                    # xs=(B,T) x=(X,B,T)
    x = dropout(x,pdrop)
    y,hy,cy = rnnforw(r,wr,x,hx,cx,hy=true,cy=true) # y=(H,B,T) hy=cy=(H,B,L)
    y = dropout(y,pdrop)
    y2 = reshape(y,size(y,1),size(y,2)*size(y,3))   # y2=(H,B*T)
    return wy*y2.+by, hy, cy
end

predict (generic function with 1 method)

In [10]:
# Define loss and its gradient
function loss(w,x,y,h;o...)
    py,hy,cy = predict(w,x,h...;o...)
    h[1],h[2] = getval(hy),getval(cy)
    return nll(py,y)
end

lossgradient = gradloss(loss);

In [11]:
function train(model,data,optim)
    hiddens = Any[nothing,nothing]
    Σ,N=0,0
    for (x,y) in data
        grads,loss1 = lossgradient(model,x,y,hiddens;pdrop=DROPOUT)
        update!(model, grads, optim)
        Σ,N=Σ+loss1,N+1
    end
    return Σ/N
end;

In [12]:
function test(model,data)
    hiddens = Any[nothing,nothing]
    Σ,N=0,0
    for (x,y) in data
        Σ,N = Σ+loss(model,x,y,hiddens), N+1
    end
    return Σ/N
end; 

In [37]:
# Sample from trained model
function generate(model,n)
    function sample(y)
        p,r=Array(exp.(y-logsumexp(y))),rand()
        for j=1:length(p); (r -= p[j]) < 0 && return j; end
    end
    h,c = nothing,nothing
    chars = model[end]
    x = findfirst(chars,'\n')
    for i=1:n
        y,h,c = predict(model,[x],h,c)
        x = sample(y)
        print(chars[x])
    end
    println()
end;

In [14]:
#=
# Train model or load from file if exists
using JLD
model=optim=nothing; knetgc()
if true # !isfile("juliacharlm.jld")
    model = initmodel()
    optim = optimizers(model, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);    info("Training...")
    @time for epoch in 1:EPOCHS
        @time trnloss = train(model,dtrn,optim) # ~18 seconds
        @time tstloss = test(model,dtst)        # ~0.5 seconds
        println((:epoch, epoch, :trnppl, exp(trnloss), :tstppl, exp(tstloss)))
    end
    save("juliacharlm.jld","model",model)
else
    model = load("juliacharlm.jld","model")
end
summary(model)
=#

In [15]:
# generate(model,1000)

In [16]:
function hyperloss(v)
    global HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT, EPOCHS, model
    HIDDENSIZE = floor(Int, 128 * 2^v[1])
    INPUTSIZE = floor(Int, 128 * 2^v[2])
    NUMLAYERS = max(1,floor(Int,v[3]))
    DROPOUT = isa(v[4],Number) ? sigm(v[4]) : 0
    @show (HIDDENSIZE,INPUTSIZE,NUMLAYERS,DROPOUT)
    knetgc()
    model = initmodel()
    optim = optimizers(model, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS)
    for epoch in 1:10
        trnloss = train(model,dtrn,optim) # ~18 seconds
        tstloss = test(model,dtst)        # ~0.5 seconds
        println((:epoch, epoch, :trnppl, exp(trnloss), :tstppl, exp(tstloss)))
    end
    trnloss = test(model,dtrn)
    trnppl = exp(trnloss)
    @show (HIDDENSIZE,INPUTSIZE,NUMLAYERS,DROPOUT,trnppl)
    return trnppl
end

hyperloss (generic function with 1 method)

In [17]:
model=nothing; knetgc(); hyperloss([3,0,3,0])

LoadError: [91mcudnn.cudnnRNNForwardInference error 8[39m

(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT) = (1024, 128, 3, 0.5)


In [18]:
#=

(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT) = (1024, 128, 2, 0.5)
(:epoch, 1, :trnppl, 9.589466f0, :tstppl, 4.9871907f0)
(:epoch, 2, :trnppl, 3.741831f0, :tstppl, 3.9569008f0)
(:epoch, 3, :trnppl, 3.111019f0, :tstppl, 3.6438942f0)
(:epoch, 4, :trnppl, 2.842651f0, :tstppl, 3.490925f0)
(:epoch, 5, :trnppl, 2.6792212f0, :tstppl, 3.406119f0)
(:epoch, 6, :trnppl, 2.567202f0, :tstppl, 3.337223f0)
(:epoch, 7, :trnppl, 2.484569f0, :tstppl, 3.2894032f0)
(:epoch, 8, :trnppl, 2.4184995f0, :tstppl, 3.2509122f0)
(:epoch, 9, :trnppl, 2.3658345f0, :tstppl, 3.2483802f0)
(:epoch, 10, :trnppl, 2.3232405f0, :tstppl, 3.2095058f0)
(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT, trnppl) = (1024, 128, 2, 0.5, 2.0069642f0)

(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT) = (4096, 128, 1, 0.5)
(:epoch, 1, :trnppl, 7.630349f0, :tstppl, 8.994627f0)
(:epoch, 2, :trnppl, 3.1094067f0, :tstppl, 3.7359138f0)
(:epoch, 3, :trnppl, 2.5211895f0, :tstppl, 3.406949f0)
(:epoch, 4, :trnppl, 2.24173f0, :tstppl, 3.3002508f0)
(:epoch, 5, :trnppl, 2.0695934f0, :tstppl, 3.2824345f0)
(:epoch, 6, :trnppl, 1.9510676f0, :tstppl, 3.2519991f0)
(:epoch, 7, :trnppl, 1.8637797f0, :tstppl, 3.2833624f0)
(:epoch, 8, :trnppl, 1.7990172f0, :tstppl, 3.2755644f0)
(:epoch, 9, :trnppl, 1.7516453f0, :tstppl, 3.3127692f0)
(:epoch, 10, :trnppl, 1.7112837f0, :tstppl, 3.3849468f0)
(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT, trnppl) = (4096, 128, 1, 0.5, 1.5795085f0)
1.5795085f0

(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT) = (2048, 128, 1, 0.5)
(:epoch, 1, :trnppl, 6.897045f0, :tstppl, 4.5084825f0)
(:epoch, 2, :trnppl, 3.1653059f0, :tstppl, 3.7902246f0)
(:epoch, 3, :trnppl, 2.6861045f0, :tstppl, 3.5171616f0)
(:epoch, 4, :trnppl, 2.4583292f0, :tstppl, 3.4100456f0)
(:epoch, 5, :trnppl, 2.3147066f0, :tstppl, 3.3488684f0)
(:epoch, 6, :trnppl, 2.2171206f0, :tstppl, 3.3439498f0)
(:epoch, 7, :trnppl, 2.1446044f0, :tstppl, 3.3066962f0)
(:epoch, 8, :trnppl, 2.0894327f0, :tstppl, 3.3102229f0)
(:epoch, 9, :trnppl, 2.0460289f0, :tstppl, 3.287103f0)
(:epoch, 10, :trnppl, 2.0100765f0, :tstppl, 3.3185039f0)
(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT, trnppl) = (2048, 128, 1, 0.5, 1.7895192f0)
1.7895192f0

(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT) = (2896, 128, 1, 0.5)
(:epoch, 1, :trnppl, 7.1333003f0, :tstppl, 4.4698453f0)
(:epoch, 2, :trnppl, 3.04668f0, :tstppl, 3.719867f0)
(:epoch, 3, :trnppl, 2.5365477f0, :tstppl, 3.439777f0)
(:epoch, 4, :trnppl, 2.295574f0, :tstppl, 3.3415797f0)
(:epoch, 5, :trnppl, 2.1462688f0, :tstppl, 3.27515f0)
(:epoch, 6, :trnppl, 2.044871f0, :tstppl, 3.2823677f0)
(:epoch, 7, :trnppl, 1.9718276f0, :tstppl, 3.2806444f0)
(:epoch, 8, :trnppl, 1.9180799f0, :tstppl, 3.2798069f0)
(:epoch, 9, :trnppl, 1.8782355f0, :tstppl, 3.2703993f0)
(:epoch, 10, :trnppl, 1.8446114f0, :tstppl, 3.3073356f0)
(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT, trnppl) = 
1.6504527f0
(2896, 128, 1, 0.5, 1.6504527f0)

(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT) = (2048, 128, 1, 0)
(:epoch, 1, :trnppl, 17.54582f0, :tstppl, 9.120878f0)
(:epoch, 2, :trnppl, 5.3387966f0, :tstppl, 5.298701f0)
(:epoch, 3, :trnppl, 3.4404564f0, :tstppl, 4.2200007f0)
(:epoch, 4, :trnppl, 2.7447715f0, :tstppl, 3.8503358f0)
(:epoch, 5, :trnppl, 2.3742914f0, :tstppl, 3.695858f0)
(:epoch, 6, :trnppl, 2.1303215f0, :tstppl, 3.6608467f0)
(:epoch, 7, :trnppl, 1.9553958f0, :tstppl, 3.687474f0)
(:epoch, 8, :trnppl, 1.8226186f0, :tstppl, 3.7828565f0)
(:epoch, 9, :trnppl, 1.7212292f0, :tstppl, 3.8975382f0)
(:epoch, 10, :trnppl, 1.6417012f0, :tstppl, 4.0848227f0)
(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT, trnppl) = 
1.7484467f0
(2048, 128, 1, 0, 1.7484467f0)

(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT) = (1024, 128, 2, 0.5)
(:epoch, 1, :trnppl, 22.390598f0, :tstppl, 13.189085f0)
(:epoch, 2, :trnppl, 7.4823937f0, :tstppl, 5.720241f0)
(:epoch, 3, :trnppl, 4.6992884f0, :tstppl, 4.448046f0)
(:epoch, 4, :trnppl, 3.7799704f0, :tstppl, 3.972568f0)
(:epoch, 5, :trnppl, 3.3514497f0, :tstppl, 3.7654393f0)
(:epoch, 6, :trnppl, 3.0903006f0, :tstppl, 3.5971427f0)
(:epoch, 7, :trnppl, 2.9079294f0, :tstppl, 3.484381f0)
(:epoch, 8, :trnppl, 2.7710462f0, :tstppl, 3.443712f0)
(:epoch, 9, :trnppl, 2.663027f0, :tstppl, 3.3361208f0)
(:epoch, 10, :trnppl, 2.5764978f0, :tstppl, 3.3100765f0)
(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT, trnppl) = 
2.175947f0
(1024, 128, 2, 0.5, 2.175947f0)

(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT) = (1024, 128, 2, 0)
(:epoch, 1, :trnppl, 22.009565f0, :tstppl, 11.428911f0)
(:epoch, 2, :trnppl, 6.030965f0, :tstppl, 5.366185f0)
(:epoch, 3, :trnppl, 3.614693f0, :tstppl, 4.288177f0)
(:epoch, 4, :trnppl, 2.906743f0, :tstppl, 3.948376f0)
(:epoch, 5, :trnppl, 2.572164f0, :tstppl, 3.8014898f0)
(:epoch, 6, :trnppl, 2.3514614f0, :tstppl, 3.7232788f0)
(:epoch, 7, :trnppl, 2.1846726f0, :tstppl, 3.7083063f0)
(:epoch, 8, :trnppl, 2.056641f0, :tstppl, 3.7179863f0)
(:epoch, 9, :trnppl, 1.9550104f0, :tstppl, 3.768648f0)
(:epoch, 10, :trnppl, 1.8722341f0, :tstppl, 3.8625576f0)
(HIDDENSIZE, INPUTSIZE, NUMLAYERS, DROPOUT, trnppl) = 
1.9898754f0

=#

In [22]:
#foo = download("http://ai.ku.edu.tr/models/nlp-demos/juliacharlm.jld")
using JLD
model = nothing; knetgc()
model = load(foo,"model")
summary.(model)

("Knet.RNN", "1×1×17842176 Knet.KnetArray{Float32,3}", "128×128 Knet.KnetArray{Float32,2}", "128×2048 Knet.KnetArray{Float32,2}", "128×1 Knet.KnetArray{Float32,2}")

In [24]:
cpumodel = (model[1],map(Array,model[2:end])...)

(Knet.RNN(128, 2048, 1, 0.5, 0, 0, 2, 0, Float32, Knet.RD(Ptr{Void} @0x00007f527cfa37c0), Knet.DD(Ptr{Void} @0x00007f527c489fb0, Knet.KnetArray{UInt8,1}(Knet.KnetPtr(Ptr{Void} @0x000001090f000000, 1474560, 1, nothing), (1474560,))), nothing, nothing, nothing), Float32[-0.130193]

Float32[-0.110856]

Float32[0.224105]

...

Float32[-0.0971482]

Float32[-0.126658]

Float32[-0.119905], Float32[0.00395426 0.242381 … 0.004736 0.0848034; 0.00741395 0.0714351 … 0.0387686 0.0708666; … ; -0.151519 0.0125546 … 0.13094 -0.0600793; 0.163786 0.0216087 … -0.110347 -0.167693], Float32[0.11218 0.232794 … 0.0872763 -0.0770994; 0.0390845 -0.219831 … -0.353927 0.0227392; … ; -0.047294 -0.170231 … 0.176739 0.0469917; -0.0778935 -0.077268 … -0.0225408 -0.0718321], Float32[0.740456; 0.329557; … ; -0.184157; -0.247125])

In [25]:
generate(cpumodel,1000)

    StackTrace:
               invoke_fexpr, sv.src, sv.loc_setant, version)
        end
        fension = vals[idx]
        if !effect_free(out_pin, expr.args[2], i, vers, valid_args[2], slot_id, vs)
            ssa_func(data, src.codeEstions[l])
            if key_common_constant == (maj2_parled && (size - length(val)) > maxline)
                throw(ArgumentError("invalid constant in different task"))
            end
        end
    end
    pos = count(print_worker_lock(bo)
    to_type = abstract_eval_constant(defaultl, default_stream, wlong)
    return Tuple{t.storage, invoke_fexpr, error==1)
end

function delete_value_nopass!(m::Module, ::Type{Val{N}}) where N
    # which not checking as Nullex types
end

end

function is_meta_expr_indices(ex, get_replaced)
    print_args, print_with_color()
    push!(args, labelnum)
    if !false
        return start(str)
    end
    esc
    pid = options(val, false)
    naiss = isa(typ, UnionAll)
    str = convert(1, cote)
    # Number of outpu

In [26]:
dump(cpumodel[1])

Knet.RNN
  inputSize: Int32 128
  hiddenSize: Int32 2048
  numLayers: Int32 1
  dropout: Float64 0.5
  inputMode: Int32 0
  direction: Int32 0
  mode: Int32 2
  algo: Int32 0
  dataType: Float32 <: AbstractFloat
  rnnDesc: Knet.RD
    ptr: Ptr{Void} Ptr{Void} @0x00007f527cfa37c0
  dropoutDesc: Knet.DD
    ptr: Ptr{Void} Ptr{Void} @0x00007f527c489fb0
    states: Knet.KnetArray{UInt8,1}
      ptr: Knet.KnetPtr
        ptr: Ptr{Void} Ptr{Void} @0x000001090f000000
        len: Int64 1474560
        dev: Int64 1
        parent: Void nothing
      dims: Tuple{Int64}
        1: Int64 1474560
  dx: Void nothing
  dhx: Void nothing
  dcx: Void nothing


In [27]:
cpumodel[1].rnnDesc = cpumodel[1].dropoutDesc = nothing

In [28]:
dump(cpumodel[1])

Knet.RNN
  inputSize: Int32 128
  hiddenSize: Int32 2048
  numLayers: Int32 1
  dropout: Float64 0.5
  inputMode: Int32 0
  direction: Int32 0
  mode: Int32 2
  algo: Int32 0
  dataType: Float32 <: AbstractFloat
  rnnDesc: Void nothing
  dropoutDesc: Void nothing
  dx: Void nothing
  dhx: Void nothing
  dcx: Void nothing


In [40]:
save("juliacharlm.jld","model",cpumodel)

In [30]:
pwd()

"/data/scratch/deniz/examples2/dl-tutorial"

In [31]:
foo

"/tmp/julianOJPUy"

In [34]:
shake = load("shakespeare.jld","model")

(Knet.RNN(168, 334, 1, 0.0, 0, 0, 2, 0, Float32, Knet.RD(Ptr{Void} @0x00007f52769a48f0), Knet.DD(Ptr{Void} @0x0000000004f502e0, Knet.KnetArray{UInt8,1}(Knet.KnetPtr(Ptr{Void} @0x0000010913610000, 1474560, 1, nothing), (1474560,))), nothing, nothing, nothing), Knet.KnetArray{Float32,3}(Knet.KnetPtr(Ptr{Void} @0x0000010913c00000, 2693376, 1, nothing), (1, 1, 673344)), Knet.KnetArray{Float32,2}(Knet.KnetPtr(Ptr{Void} @0x000001090ee20000, 56448, 1, nothing), (168, 84)), Knet.KnetArray{Float32,2}(Knet.KnetPtr(Ptr{Void} @0x0000010914a30000, 112224, 1, nothing), (84, 334)), Knet.KnetArray{Float32,2}(Knet.KnetPtr(Ptr{Void} @0x000001090ec01200, 336, 1, nothing), (84, 1)))

In [36]:
generate(shake,1000)

({cd    g inbe ot
ll ieahe pr)soelbeo: na, yseoeatlpucd    _iesemrse g f
ll art otnp tie( )yra (
ae rfacd    "=n
aot tie e6yrst
a= tr tie tpsnatxocd    }l
..)sp{ ormtna,leo nse =l
ttleu S)t g f
ll tr)hix,cd    na, `rs, vhrsa: tell (e{ mrs xt
o .)t n hr(ynap fesecd    tr hr(e tr iel, (p ien,u Sp mres
hR(rssrf: hram)o
ra: g oeeucd>Se tie [eatuDDcd  An(u `
Te nara inbe arti
a= hsrfax, tie mee,
a= hsntieocd    Ie=eat: ()ot in, .)txo hr(ysrbe, fsr.esoo otsra={  cd    "a, fell f
oi pr) =r .sentie eatsn
ax, trrCcd  vg}gwg#vu Iet)sa hraoeatleoo r(
t: o
s{cd            "a, let (e art ,srf arf:cd             vTeey
a= rahe: ylrt tie fnllocd           knan=e 
a tie lenys
beocd         B
ti .lnhT penso rxest na, irye ien,cd      kn, tint aefoeti: na, tiese{cd          _ip a
he senora =snat (e nllucd  _0EIA#vu "a, iese 
t 
o arae 
a .s
=it r)t rmcd     "a, .s
a= i
( tr ,
e: na, yees n fseToucd     gxll otnp art inotp i
(u Seoeeb
a= ir(e:cd     _ir) .ensxot fe inbe 
a i
o hrhTo ,eoesbx,:cd     1s 
a 

In [39]:
cpumodel = (cpumodel..., chars)

(Knet.RNN(128, 2048, 1, 0.5, 0, 0, 2, 0, Float32, nothing, nothing, nothing, nothing, nothing), Float32[-0.130193]

Float32[-0.110856]

Float32[0.224105]

...

Float32[-0.0971482]

Float32[-0.126658]

Float32[-0.119905], Float32[0.00395426 0.242381 … 0.004736 0.0848034; 0.00741395 0.0714351 … 0.0387686 0.0708666; … ; -0.151519 0.0125546 … 0.13094 -0.0600793; 0.163786 0.0216087 … -0.110347 -0.167693], Float32[0.11218 0.232794 … 0.0872763 -0.0770994; 0.0390845 -0.219831 … -0.353927 0.0227392; … ; -0.047294 -0.170231 … 0.176739 0.0469917; -0.0778935 -0.077268 … -0.0225408 -0.0718321], Float32[0.740456; 0.329557; … ; -0.184157; -0.247125], [' ', 'e', 't', 'r', 'n', 'i', 'a', 'o', 's', '\n'  …  'ˢ', '𝕴', '⁉', '𝚗', '🈲', '🔽', '≦', '⋟', '🚊', '𝖆'])

In [41]:
include(Knet.dir("data","gutenberg.jl"))
_,_,chars2 = shakespeare()

(UInt8[0x0e, 0x0d, 0x0e, 0x0d, 0x0e, 0x0d, 0x0e, 0x0d, 0x21, 0x27  …  0x0e, 0x0d, 0x0e, 0x0d, 0x0e, 0x0d, 0x0e, 0x0d, 0x0e, 0x0d], UInt8[0x0e, 0x0d, 0x0e, 0x0d, 0x0e, 0x0d, 0x0e, 0x0d, 0x0e, 0x0d  …  0x2d, 0x33, 0x0e, 0x0d, 0x0e, 0x0d, 0x0e, 0x0d, 0x0e, 0x0d], [' ', 'e', 't', 'o', 'a', 'h', 'n', 's', 'r', 'i'  …  '6', '9', '0', '7', '|', '8', '<', '&', '}', '`'])

In [42]:
shake = (shake..., chars2)

(Knet.RNN(168, 334, 1, 0.0, 0, 0, 2, 0, Float32, Knet.RD(Ptr{Void} @0x00007f52769a48f0), Knet.DD(Ptr{Void} @0x0000000004f502e0, Knet.KnetArray{UInt8,1}(Knet.KnetPtr(Ptr{Void} @0x0000010913610000, 1474560, 1, nothing), (1474560,))), nothing, nothing, nothing), Knet.KnetArray{Float32,3}(Knet.KnetPtr(Ptr{Void} @0x0000010913c00000, 2693376, 1, nothing), (1, 1, 673344)), Knet.KnetArray{Float32,2}(Knet.KnetPtr(Ptr{Void} @0x000001090ee20000, 56448, 1, nothing), (168, 84)), Knet.KnetArray{Float32,2}(Knet.KnetPtr(Ptr{Void} @0x0000010914a30000, 112224, 1, nothing), (84, 334)), Knet.KnetArray{Float32,2}(Knet.KnetPtr(Ptr{Void} @0x000001090ec01200, 336, 1, nothing), (84, 1)), [' ', 'e', 't', 'o', 'a', 'h', 'n', 's', 'r', 'i'  …  '6', '9', '0', '7', '|', '8', '<', '&', '}', '`'])

In [43]:
generate(shake,1000)

MANT IDER. 'Why, you assure me? It may, sir.
  QUEEN ELIZABETH. Shallow the matter! unovantis,
    When he may give it Harry, that need nothing-
    They are the lesser head and writter'd case
    And a man of villago'ne, to God fight,
    My dead, and stend a more, fright from these men
    And revenge within it in his tune,
    For unrify may be her; but which you enter
    To confirm from my tears.  
  Laer. Well, from,  
    If you come against their only country
    And see, Sir Timon and Lord, then seeks.
  GLOUCESTER. The King will espare your ladous doth mother,
    And the rreast but too good scringes or mine
    To leave on their grief should allock'd his help,
    Either-heaven defended. Pray to thy bequir's
    The forwarding what they are resolvied.
    See him all this merry meat.
  FABIAN. Whele I may may be hold? Is't plain sighs; sit. Thou weep
    A crown thou point Nirsh; betwixt in time on the means so
    For justice could win the throne of eagl


In [44]:
summary.(shake)

("Knet.RNN", "1×1×673344 Knet.KnetArray{Float32,3}", "168×84 Knet.KnetArray{Float32,2}", "84×334 Knet.KnetArray{Float32,2}", "84×1 Knet.KnetArray{Float32,2}", "84-element Array{Char,1}")

In [45]:
cpushake = (shake[1],map(Array,shake[2:end])...)

(Knet.RNN(168, 334, 1, 0.0, 0, 0, 2, 0, Float32, Knet.RD(Ptr{Void} @0x00007f52769a48f0), Knet.DD(Ptr{Void} @0x0000000004f502e0, Knet.KnetArray{UInt8,1}(Knet.KnetPtr(Ptr{Void} @0x0000010913610000, 1474560, 1, nothing), (1474560,))), nothing, nothing, nothing), Float32[-0.412303]

Float32[0.117722]

Float32[-0.0357888]

...

Float32[0.0439149]

Float32[0.101626]

Float32[0.0703143], Float32[0.143123 -0.283479 … 0.236566 -0.0708616; 0.220493 0.191383 … 0.143039 -0.0722817; … ; -0.0330645 -0.653156 … 0.291897 -0.186715; 0.257983 -0.29303 … 0.204898 -0.0792709], Float32[-0.104172 0.10832 … -0.790624 0.168949; -0.0743707 0.155518 … 1.22177 0.560434; … ; 0.0192348 0.121797 … -0.225259 0.012597; 0.00482724 0.111904 … -0.110344 0.0327026], Float32[0.0216497; 0.0268085; … ; -0.0352384; -0.0370137], [' ', 'e', 't', 'o', 'a', 'h', 'n', 's', 'r', 'i'  …  '6', '9', '0', '7', '|', '8', '<', '&', '}', '`'])

In [46]:
dump(cpushake[1])

Knet.RNN
  inputSize: Int32 168
  hiddenSize: Int32 334
  numLayers: Int32 1
  dropout: Float64 0.0
  inputMode: Int32 0
  direction: Int32 0
  mode: Int32 2
  algo: Int32 0
  dataType: Float32 <: AbstractFloat
  rnnDesc: Knet.RD
    ptr: Ptr{Void} Ptr{Void} @0x00007f52769a48f0
  dropoutDesc: Knet.DD
    ptr: Ptr{Void} Ptr{Void} @0x0000000004f502e0
    states: Knet.KnetArray{UInt8,1}
      ptr: Knet.KnetPtr
        ptr: Ptr{Void} Ptr{Void} @0x0000010913610000
        len: Int64 1474560
        dev: Int64 1
        parent: Void nothing
      dims: Tuple{Int64}
        1: Int64 1474560
  dx: Void nothing
  dhx: Void nothing
  dcx: Void nothing


In [47]:
cpushake[1].rnnDesc = cpushake[1].dropoutDesc = nothing

In [48]:
save("shakespeare.jld","model",cpushake)

In [49]:
foo1 = download("http://people.csail.mit.edu/deniz/models/nlp-demos/shakespeare.jld")


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 2813k  100 2813k    0     0  2333k      0  0:00:01  0:00:01 --:--:-- 2333k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
 91 69.1M   91 63.2M    0     0  5473k      0  0:00:12  0:00:11  0:00:01 5618k

LoadError: [91mSystemError: opening file foo1: No such file or directory[39m

100 69.1M  100 69.1M    0     0  5728k      0  0:00:12  0:00:12 --:--:-- 6895k


In [50]:
m1 = load(foo1,"model")


(Knet.RNN(128, 2048, 1, 0.5, 0, 0, 2, 0, Float32, Knet.RD(Ptr{Void} @0x00000000018273d0), Knet.DD(Ptr{Void} @0x00007f52734da4c0, Knet.KnetArray{UInt8,1}(Knet.KnetPtr(Ptr{Void} @0x0000010913e91a00, 1474560, 1, nothing), (1474560,))), nothing, nothing, nothing), Float32[-0.130193]

Float32[-0.110856]

Float32[0.224105]

...

Float32[-0.0971482]

Float32[-0.126658]

Float32[-0.119905], Float32[0.00395426 0.242381 … 0.004736 0.0848034; 0.00741395 0.0714351 … 0.0387686 0.0708666; … ; -0.151519 0.0125546 … 0.13094 -0.0600793; 0.163786 0.0216087 … -0.110347 -0.167693], Float32[0.11218 0.232794 … 0.0872763 -0.0770994; 0.0390845 -0.219831 … -0.353927 0.0227392; … ; -0.047294 -0.170231 … 0.176739 0.0469917; -0.0778935 -0.077268 … -0.0225408 -0.0718321], Float32[0.740456; 0.329557; … ; -0.184157; -0.247125])

In [51]:
generate(m1,100)

en Surreys, how?
  MESSALA. Ay, nor I am merry; that's lury.
  CELIA. Thy hair is a man, his princ


In [52]:
generate(m2,100)

LoadError: [91mBoundsError: attempt to access 128×128 Array{Float32,2} at index [Base.Slice(Base.OneTo(128)), [0]][39m

In [53]:
cpumodel

(Knet.RNN(128, 2048, 1, 0.5, 0, 0, 2, 0, Float32, nothing, nothing, nothing, nothing, nothing), Float32[-0.130193]

Float32[-0.110856]

Float32[0.224105]

...

Float32[-0.0971482]

Float32[-0.126658]

Float32[-0.119905], Float32[0.00395426 0.242381 … 0.004736 0.0848034; 0.00741395 0.0714351 … 0.0387686 0.0708666; … ; -0.151519 0.0125546 … 0.13094 -0.0600793; 0.163786 0.0216087 … -0.110347 -0.167693], Float32[0.11218 0.232794 … 0.0872763 -0.0770994; 0.0390845 -0.219831 … -0.353927 0.0227392; … ; -0.047294 -0.170231 … 0.176739 0.0469917; -0.0778935 -0.077268 … -0.0225408 -0.0718321], Float32[0.740456; 0.329557; … ; -0.184157; -0.247125], [' ', 'e', 't', 'r', 'n', 'i', 'a', 'o', 's', '\n'  …  'ˢ', '𝕴', '⁉', '𝚗', '🈲', '🔽', '≦', '⋟', '🚊', '𝖆'])

In [54]:
save("juliacharlm.jld","model",cpumodel)

In [55]:
foo2 = download("http://people.csail.mit.edu/deniz/models/nlp-demos/juliacharlm.jld")
m2 = load(foo2,"model")

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
 93 69.1M   93 64.3M    0     0  7295k      0  0:00:09  0:00:09 --:--:-- 7242k

(Knet.RNN(128, 2048, 1, 0.5, 0, 0, 2, 0, Float32, Knet.RD(Ptr{Void} @0x00007f5277618250), Knet.DD(Ptr{Void} @0x000000000192cff0, Knet.KnetArray{UInt8,1}(Knet.KnetPtr(Ptr{Void} @0x0000010913610000, 1474560, 1, nothing), (1474560,))), nothing, nothing, nothing), Float32[-0.130193]

Float32[-0.110856]

Float32[0.224105]

...

Float32[-0.0971482]

Float32[-0.126658]

Float32[-0.119905], Float32[0.00395426 0.242381 … 0.004736 0.0848034; 0.00741395 0.0714351 … 0.0387686 0.0708666; … ; -0.151519 0.0125546 … 0.13094 -0.0600793; 0.163786 0.0216087 … -0.110347 -0.167693], Float32[0.11218 0.232794 … 0.0872763 -0.0770994; 0.0390845 -0.219831 … -0.353927 0.0227392; … ; -0.047294 -0.170231 … 0.176739 0.0469917; -0.0778935 -0.077268 … -0.0225408 -0.0718321], Float32[0.740456; 0.329557; … ; -0.184157; -0.247125], [' ', 'e', 't', 'r', 'n', 'i', 'a', 'o', 's', '\n'  …  'ˢ', '𝕴', '⁉', '𝚗', '🈲', '🔽', '≦', '⋟', '🚊', '𝖆'])

100 69.1M  100 69.1M    0     0  7533k      0  0:00:09  0:00:09 --:--:-- 7965k


In [56]:
generate(m2,100)

        # fixing mode
        containspind(C)
    end
    ogd_println = filter(0 -> delete!(spp[i], 


In [57]:
dump(m2[1])

Knet.RNN
  inputSize: Int32 128
  hiddenSize: Int32 2048
  numLayers: Int32 1
  dropout: Float64 0.5
  inputMode: Int32 0
  direction: Int32 0
  mode: Int32 2
  algo: Int32 0
  dataType: Float32 <: AbstractFloat
  rnnDesc: Knet.RD
    ptr: Ptr{Void} Ptr{Void} @0x00007f5277618250
  dropoutDesc: Knet.DD
    ptr: Ptr{Void} Ptr{Void} @0x000000000192cff0
    states: Knet.KnetArray{UInt8,1}
      ptr: Knet.KnetPtr
        ptr: Ptr{Void} Ptr{Void} @0x0000010913610000
        len: Int64 1474560
        dev: Int64 1
        parent: Void nothing
      dims: Tuple{Int64}
        1: Int64 1474560
  dx: Void nothing
  dhx: Void nothing
  dcx: Void nothing
