In [1]:
using Pkg
Pkg.activate("..")

[32m[1m  Activating[22m[39m project at `~/Code/1DI2153/RNN`


In [2]:
using JLD2
X_train = load("../data/imdb_dataset_prepared.jld2", "X_train")
y_train = load("../data/imdb_dataset_prepared.jld2", "y_train")
X_test = load("../data/imdb_dataset_prepared.jld2", "X_test")
y_test = load("../data/imdb_dataset_prepared.jld2", "y_test")
embeddings = load("../data/imdb_dataset_prepared.jld2", "embeddings")
vocab = load("../data/imdb_dataset_prepared.jld2", "vocab")
nothing

embedding_dim = size(embeddings,1);

In [3]:
using Flux

model = Chain(
    Flux.Embedding(length(vocab), embedding_dim),
    Flux.RNN(embedding_dim => 16, relu, return_state = true),
    x -> x[end],
    Flux.flatten,
    Dense(16, 1, σ)
)


Chain(
  Embedding(12849 => 50),               [90m# 642_450 parameters[39m
  RNN(50 => 16, relu),                  [90m# 1_072 parameters[39m
  var"#1#2"(),
  Flux.flatten,
  Dense(16 => 1, σ),                    [90m# 17 parameters[39m
) [90m                  # Total: 6 arrays, [39m643_539 parameters, 2.455 MiB.

In [4]:
# add Glove embeddings to Embedding layer
model.layers[1].weight .= embeddings;

In [5]:
using Printf, Statistics

dataset = Flux.DataLoader((X_train, y_train), batchsize=128, shuffle=true)

loss(m, x, y) = Flux.Losses.binarycrossentropy(m(x), y)
accuracy(m, x, y) = mean((m(x) .> 0.5) .== (y .> 0.5))

opt = Optimisers.setup(RMSProp(), model)

epochs = 20
for epoch in 1:epochs
    total_loss = 0.0
    total_acc = 0.0
    num_samples = 0

    t = @elapsed begin
        for (x, y) in dataset
            Flux.reset!(model)
            grads = Flux.gradient(model) do m
                loss(m, x, y)
            end
            Optimisers.update!(opt, model, grads[1])
            total_loss += loss(model, x, y)
            total_acc += accuracy(model, x, y)
            num_samples += 1
        end

        train_loss = total_loss / num_samples
        train_acc = total_acc / num_samples

        test_acc = accuracy(model, X_test, y_test)
        test_loss = loss(model, X_test, y_test)
    end

    println(@sprintf("Epoch: %d (%.2fs) \tTrain: (l: %.2f, a: %.2f) \tTest: (l: %.2f, a: %.2f)", 
        epoch, t, train_loss, train_acc, test_loss, test_acc))
end

Epoch: 1 (13.97s) 	Train: (l: 0.70, a: 0.52) 	Test: (l: 0.69, a: 0.50)
Epoch: 2 (4.91s) 	Train: (l: 0.69, a: 0.53) 	Test: (l: 0.69, a: 0.50)
Epoch: 3 (4.91s) 	Train: (l: 0.63, a: 0.63) 	Test: (l: 0.59, a: 0.73)
Epoch: 4 (4.81s) 	Train: (l: 0.51, a: 0.77) 	Test: (l: 0.49, a: 0.78)
Epoch: 5 (4.90s) 	Train: (l: 0.44, a: 0.81) 	Test: (l: 0.46, a: 0.81)
Epoch: 6 (4.81s) 	Train: (l: 0.40, a: 0.84) 	Test: (l: 0.42, a: 0.82)
Epoch: 7 (4.81s) 	Train: (l: 0.37, a: 0.86) 	Test: (l: 0.40, a: 0.83)
Epoch: 8 (4.88s) 	Train: (l: 0.36, a: 0.86) 	Test: (l: 0.50, a: 0.81)
Epoch: 9 (4.91s) 	Train: (l: 0.32, a: 0.88) 	Test: (l: 0.41, a: 0.84)
Epoch: 10 (4.97s) 	Train: (l: 0.30, a: 0.89) 	Test: (l: 0.38, a: 0.85)
Epoch: 11 (4.94s) 	Train: (l: 0.27, a: 0.90) 	Test: (l: 0.36, a: 0.86)
Epoch: 12 (4.88s) 	Train: (l: 0.24, a: 0.91) 	Test: (l: 0.41, a: 0.86)
Epoch: 13 (4.90s) 	Train: (l: 0.21, a: 0.92) 	Test: (l: 0.32, a: 0.87)
Epoch: 14 (4.94s) 	Train: (l: 0.20, a: 0.92) 	Test: (l: 0.45, a: 0.83)
Epoch: 15 (4.9