In [1]:
using Knet, Random, Printf, Dates

In [2]:
w2i = Dict{String,Int}()
unk = get!(w2i, "<unk>", 1+length(w2i))
w2i_f(w) = get!(w2i, w, 1+length(w2i))

w2i_f (generic function with 1 method)

In [3]:
function readdata(filename)
    data = []
    lines = readlines(filename)
    for line in lines
        tag, words = split(strip(lowercase(line)), " ||| ")
        tag = parse(Int,tag) + 1
        words = [w2i_f(w) for w in split(words)]
        push!(data,(words,tag))
    end
    return data
end

readdata (generic function with 1 method)

In [4]:
trn_data = readdata("./train.txt")
w2i_f(w) = get(w2i, w, unk)
tst_data = readdata("./test.txt");

In [5]:
nwords = length(w2i)
ntags = 5
atype = Knet.atype()
EMB_SIZE = 64
WIN_SIZE = 3
FILTER_SIZE = 64

64

In [6]:
# Embed layer
struct Embed; w; end
Embed(vocabsize::Int, embedsize::Int) = Embed(param(embedsize, vocabsize,1,1,atype=atype))
function (l::Embed)(x)
    e = l.w[:,x,:,:]
end

In [7]:
#Define a convolutional layer:
struct Conv; w; b; f; p; end
function (c::Conv)(x)
    conved = conv4(c.w, dropout(x,c.p)) .+ c.b
    out = c.f.(pool(conved; window=(size(conved, 1), size(conved, 2)))) # global max pooling
    return out
end
Conv(w1::Int,w2::Int,cx::Int,cy::Int,f=relu;pdrop=0) = Conv(param(w1,w2,cx,cy,atype=atype), param0(1,1,cy,1,atype=atype), f, pdrop)

Conv

In [8]:
# Define dense layer
struct Dense; w; b; f; p; end
(d::Dense)(x) = d.f.(d.w * mat(dropout(x,d.p)) .+ d.b) # mat reshapes 4-D tensor to 2-D matrix so we can use matmul
Dense(i::Int,o::Int,f=relu;pdrop=0) = Dense(param(o,i,atype=atype), param0(o,atype=atype), f, pdrop)

Dense

In [9]:
# Let's define a chain of layers
struct Chain
    layers
    Chain(layers...) = new(layers)
end

function (c::Chain)(x)
#     words1 = copy(x)
    if length(x) < WIN_SIZE
        for i in 1:WIN_SIZE-length(x)
            push!(x,1) 
        end
    end
    
    for l in c.layers
        x = l(x)
    end
    x
end
(c::Chain)(x,y) = nll(c(x),[y]) 

In [10]:
model = Chain(Embed(nwords, EMB_SIZE),
              Conv(EMB_SIZE, WIN_SIZE, 1, FILTER_SIZE,pdrop=0.2),
              Dense(FILTER_SIZE,ntags,identity,pdrop=0.1))

Chain((Embed(P(KnetArray{Float32,4}(64,16580,1,1))), Conv(P(KnetArray{Float32,4}(64,3,1,64)), P(KnetArray{Float32,4}(1,1,64,1)), NNlib.relu, 0.2), Dense(P(KnetArray{Float32,2}(5,64)), P(KnetArray{Float32,1}(5)), identity, 0.1)))

In [11]:
function acc_loss(x,y)
    scores = model(x)
    loss = nll(scores,[y])
    accuracy = argmax(scores)[1] == y
    return loss, accuracy
end

acc_loss (generic function with 1 method)

In [12]:
t = Time(now())
for i in 1:10    
    adam!(model,shuffle(trn_data))
    trn_lss_acc = [acc_loss(x...) for x in trn_data]
    trn_loss = sum([i[1] for i in trn_lss_acc])/length(trn_data)
    trn_acc =sum([i[2] for i in trn_lss_acc])/length(trn_data)

    tst_lss_acc = [acc_loss(x...) for x in tst_data]
    tst_loss = sum([i[1] for i in tst_lss_acc])/length(tst_data)
    tst_acc =sum([i[2] for i in tst_lss_acc])/length(tst_data)
    
    @printf("iter %d: train loss/sent=%.4f, acc=%.4f, time=%s\n",i-1,trn_loss,trn_acc,Time(now())-t)
    @printf("iter %d: test acc=%.4f\n",i-1,tst_acc)
    t = Time(now())
end

iter 0: train loss/sent=1.1617, acc=0.5272, time=63915000000 nanoseconds
iter 0: test acc=0.4149
iter 1: train loss/sent=0.8810, acc=0.6593, time=33123000000 nanoseconds
iter 1: test acc=0.4086
iter 2: train loss/sent=0.4579, acc=0.8803, time=33093000000 nanoseconds
iter 2: test acc=0.4195

