In [1]:
using Knet, Random, Printf, Dates

In [2]:
w2i = Dict{String,Int}()
unk = get!(w2i, "<unk>", 1+length(w2i))
w2i_f(w) = get!(w2i, w, 1+length(w2i))

w2i_f (generic function with 1 method)

In [3]:
atype = Knet.atype()
# atype = Array{Float32}

KnetArray{Float32,N} where N

In [4]:
function readdata(filename)
    data = []
    lines = readlines(filename)
    for line in lines
        tag,words = split(lowercase(strip(line))," ||| ")
        tag = parse(Int,tag) + 1
        words = [w2i_f(w) for w in split(words)]
        push!(data,(words,tag))
    end
    
    return data
end

readdata (generic function with 1 method)

In [5]:
trn_data = readdata("./train.txt")
w2i_f(w) = get(w2i, w, unk)
tst_data = readdata("./test.txt");

In [6]:
nwords = length(w2i)
ntags = 5;

In [7]:
struct Embed; w; end

function Embed(vocabsize::Int, embedsize::Int)
    return Embed(param(1, vocabsize, embedsize, 1, atype=atype))
end

function (l::Embed)(x)
    l.w[:,x,:,:]
end

In [8]:
#Define a convolutional layer:
struct Conv; w; b; f; p; end
function (c::Conv)(x)
#     @show(size(x))
    dropped=dropout(x,c.p)
    conved = conv4(c.w, dropped, padding=(0,1)) .+ c.b
#     @show(size(conved))
    pooled = pool(conved; window=(size(conved, 1), size(conved, 2))) # global max pooling
    c.f.(pooled)
end
Conv(w1::Int,w2::Int,cx::Int,cy::Int,f=relu;pdrop=0) = Conv(param(w1,w2,cx,cy,atype=atype), param0(1,1,cy,1,atype=atype), f, pdrop)

Conv

In [9]:
# Define dense layer:
struct Dense; w; b; f; p; end
(d::Dense)(x) = d.f.(d.w * mat(dropout(x,d.p)) .+ d.b) # mat reshapes 4-D tensor to 2-D matrix so we can use matmul
Dense(i::Int,o::Int,f=relu;pdrop=0) = Dense(param(o,i,atype=atype), param0(o,atype=atype), f, pdrop)

Dense

In [10]:
# Let's define a chain of layers
struct Chain
    layers
    Chain(layers...) = new(layers)
end
function (c::Chain)(x)
    if length(x) < WIN_SIZE
        for i in 1:WIN_SIZE-length(x)
            push!(x,1)
        end
    end
    for l in c.layers
        x = l(x)
    end
    x
end
(c::Chain)(x,y) = nll(c(x),[y]) 

In [11]:
EMB_SIZE = 64
WIN_SIZE = 3
FILTER_SIZE = 64

64

In [12]:
model = Chain(Embed(nwords,EMB_SIZE),
              Conv(1, WIN_SIZE, EMB_SIZE, FILTER_SIZE,pdrop=0.1),
              Dense(FILTER_SIZE,ntags,identity))

Chain((Embed(P(KnetArray{Float32,4}(1,16580,64,1))), Conv(P(KnetArray{Float32,4}(1,3,64,64)), P(KnetArray{Float32,4}(1,1,64,1)), NNlib.relu, 0.1), Dense(P(KnetArray{Float32,2}(5,64)), P(KnetArray{Float32,1}(5)), identity, 0)))

In [13]:
function acc_loss(x,y)
    scores = model(x)
    loss = nll(scores,[y])
    accuracy = argmax(scores)[1] == y
    return loss, accuracy
end

acc_loss (generic function with 1 method)

In [14]:
t = Time(now())
for i in 1:10    
    adam!(model,shuffle(trn_data))
    trn_lss_acc = [acc_loss(x...) for x in trn_data]
    trn_loss = sum([i[1] for i in trn_lss_acc])/length(trn_data)
    trn_acc =sum([i[2] for i in trn_lss_acc])/length(trn_data)

    tst_lss_acc = [acc_loss(x...) for x in tst_data]
    tst_loss = sum([i[1] for i in tst_lss_acc])/length(tst_data)
    tst_acc =sum([i[2] for i in tst_lss_acc])/length(tst_data)
    
    @printf("iter %d: train loss/sent=%.4f, acc=%.4f, time=%s\n",i-1,trn_loss,trn_acc,Time(now())-t)
    @printf("iter %d: test acc=%.4f\n",i-1,tst_acc)
    t = Time(now())
end

iter 0: train loss/sent=1.1496, acc=0.5497, time=64617000000 nanoseconds
iter 0: test acc=0.4086
iter 1: train loss/sent=0.6800, acc=0.7937, time=30418000000 nanoseconds
iter 1: test acc=0.3937
iter 2: train loss/sent=0.2924, acc=0.9198, time=32230000000 nanoseconds
iter 2: test acc=0.4077
iter 3: train loss/sent=0.1098, acc=0.9683, time=32069000000 nanoseconds
iter 3: test acc=0.3995

