In [1]:
using Pkg
Pkg.activate("..")

[32m[1m  Activating[22m[39m project at `~/Code/1DI2153/RNN`


In [None]:
using JLD2
X_train = load("../data/imdb_dataset_prepared.jld2", "X_train")
y_train = load("../data/imdb_dataset_prepared.jld2", "y_train")
X_test = load("../data/imdb_dataset_prepared.jld2", "X_test")
y_test = load("../data/imdb_dataset_prepared.jld2", "y_test")
vocab = load("../data/imdb_dataset_prepared.jld2", "vocab")
embeddings = load("../data/imdb_dataset_prepared.jld2", "embeddings")
nothing

embedding_dim = size(embeddings,1);

In [3]:
using Flux, Random
Random.seed!(0)

model = Chain(
    Flux.Embedding(length(vocab), embedding_dim),
    Flux.RNN(embedding_dim => 16, relu, return_state = true),
    x -> x[end],
    Flux.flatten,
    Dense(16, 1, σ)
)


Chain(
  Embedding(12849 => 50),               [90m# 642_450 parameters[39m
  RNN(50 => 16, relu),                  [90m# 1_072 parameters[39m
  var"#1#2"(),
  Flux.flatten,
  Dense(16 => 1, σ),                    [90m# 17 parameters[39m
) [90m                  # Total: 6 arrays, [39m643_539 parameters, 2.455 MiB.

In [4]:
# add Glove embeddings to Embedding layer
model.layers[1].weight .= embeddings;

In [5]:
using Printf, Statistics

dataset = Flux.DataLoader((X_train, y_train), batchsize=128, shuffle=true)

loss(m, x, y) = Flux.Losses.binarycrossentropy(m(x), y)
accuracy(m, x, y) = mean((m(x) .> 0.5) .== (y .> 0.5))

opt = Optimisers.setup(RMSProp(), model)

epochs = 12
for epoch in 1:epochs
    total_loss = 0.0
    total_acc = 0.0
    num_samples = 0

    t = @elapsed begin
        for (x, y) in dataset
            Flux.reset!(model)
            grads = Flux.gradient(model) do m
                loss(m, x, y)
            end
            Optimisers.update!(opt, model, grads[1])
            total_loss += loss(model, x, y)
            total_acc += accuracy(model, x, y)
            num_samples += 1
        end

        train_loss = total_loss / num_samples
        train_acc = total_acc / num_samples

        test_acc = accuracy(model, X_test, y_test)
        test_loss = loss(model, X_test, y_test)
    end

    println(@sprintf("Epoch: %d (%.2fs) \tTrain: (l: %.2f, a: %.2f) \tTest: (l: %.2f, a: %.2f)", 
        epoch, t, train_loss, train_acc, test_loss, test_acc))
end

Epoch: 1 (14.20s) 	Train: (l: 0.69, a: 0.52) 	Test: (l: 0.69, a: 0.50)
Epoch: 2 (4.99s) 	Train: (l: 0.68, a: 0.54) 	Test: (l: 0.69, a: 0.51)
Epoch: 3 (4.88s) 	Train: (l: 0.57, a: 0.71) 	Test: (l: 0.55, a: 0.76)
Epoch: 4 (5.09s) 	Train: (l: 0.47, a: 0.79) 	Test: (l: 0.46, a: 0.79)
Epoch: 5 (4.96s) 	Train: (l: 0.41, a: 0.83) 	Test: (l: 0.43, a: 0.82)
Epoch: 6 (5.09s) 	Train: (l: 0.37, a: 0.85) 	Test: (l: 0.40, a: 0.83)
Epoch: 7 (5.03s) 	Train: (l: 0.34, a: 0.87) 	Test: (l: 0.83, a: 0.66)
Epoch: 8 (5.34s) 	Train: (l: 0.31, a: 0.88) 	Test: (l: 0.38, a: 0.84)
Epoch: 9 (5.02s) 	Train: (l: 0.28, a: 0.89) 	Test: (l: 0.37, a: 0.85)
Epoch: 10 (5.24s) 	Train: (l: 0.26, a: 0.91) 	Test: (l: 0.37, a: 0.85)
Epoch: 11 (5.01s) 	Train: (l: 0.24, a: 0.91) 	Test: (l: 0.37, a: 0.86)
Epoch: 12 (5.08s) 	Train: (l: 0.23, a: 0.92) 	Test: (l: 0.33, a: 0.87)
