In [1]:
using JLD2        
include("../src/networkFunctions.jl")
include("../src/ADLibrary/functions.jl")

Sigmoid (generic function with 3 methods)

In [2]:
X_train = load("data_rnn/imdb_dataset_prepared.jld2", "X_train")
y_train = load("data_rnn/imdb_dataset_prepared.jld2", "y_train")
y_train = Float32.(y_train)
X_test = load("data_rnn/imdb_dataset_prepared.jld2", "X_test")
y_test = load("data_rnn/imdb_dataset_prepared.jld2", "y_test")
y_test  = Float32.(y_test)
embeddings = load("data_rnn/imdb_dataset_prepared.jld2", "embeddings")
vocab = load("data_rnn/imdb_dataset_prepared.jld2", "vocab")

nothing

In [3]:
# --- Model Definition ---

loss_fun(y_pred, y_true) = -mean(y_true .* log.(y_pred .+ 1e-7) .+ (1 .- y_true) .* log.(1 .- y_pred .+ 1e-7))

vocab_size = length(vocab)
embedding_dim = size(embeddings,1);
hidden_size = 16

model = Sequential([
  Embedding(vocab_size, embedding_dim),
  SimpleRNN(embedding_dim, hidden_size, ReLU), 
  SelectLastTimestep(),
  Flatten(), 
  Dense(hidden_size, 1, Sigmoid)
])

nothing

In [4]:
train!(model, loss_fun, X_train, y_train, X_test, y_test; epochs=12, lr=0.001, batchsize=128, optimizer=:Adam, clip_norm=1.0f0, decay_factor=0.5f0, decay_epochs=4, print_learning_data=true)

Epoch 1 ▶ Train Loss=0.693, Train Acc=50.83%	│   Test Loss=0.6925, Test Acc=50.71%	|   Time=28.87
Epoch 2 ▶ Train Loss=0.6847, Train Acc=53.39%	│   Test Loss=0.6914, Test Acc=51.57%	|   Time=17.7
Epoch 3 ▶ Train Loss=0.6533, Train Acc=56.62%	│   Test Loss=0.682, Test Acc=54.83%	|   Time=25.13
Epoch 4 ▶ Train Loss=0.6164, Train Acc=58.23%	│   Test Loss=0.7259, Test Acc=51.78%	|   Time=25.19
Epoch 5 ▶ Train Loss=0.5883, Train Acc=59.53%	│   Test Loss=0.7506, Test Acc=52.17%	|   Time=26.51
Epoch 6 ▶ Train Loss=0.5712, Train Acc=62.37%	│   Test Loss=0.7477, Test Acc=53.6%	|   Time=21.71
Epoch 7 ▶ Train Loss=0.4429, Train Acc=78.7%	│   Test Loss=0.5165, Test Acc=80.49%	|   Time=19.23
Epoch 8 ▶ Train Loss=0.3576, Train Acc=85.76%	│   Test Loss=0.4739, Test Acc=81.48%	|   Time=18.88
Epoch 9 ▶ Train Loss=0.3008, Train Acc=88.39%	│   Test Loss=0.6023, Test Acc=81.94%	|   Time=19.36
Epoch 10 ▶ Train Loss=0.2815, Train Acc=89.96%	│   Test Loss=0.4733, Test Acc=82.03%	|   Time=19.29
Epoch 11 ▶ Tra

In [7]:
using BenchmarkTools

model = Sequential([
  Embedding(vocab_size, embedding_dim),
  SimpleRNN(embedding_dim, hidden_size, ReLU), 
  SelectLastTimestep(),
  Flatten(), 
  Dense(hidden_size, 1, Sigmoid)
])

@benchmark train!(model, loss_fun, X_train, y_train, X_test, y_test; epochs=12, lr=0.001, batchsize=128, optimizer=:Adam, clip_norm=1.0f0, decay_factor=0.5f0, decay_epochs=4, print_learning_data=false)

BenchmarkTools.Trial: 1 sample with 1 evaluation per sample.
 Single result which took [34m193.878 s[39m (24.94% GC) to evaluate,
 with a memory estimate of [33m171.02 GiB[39m, over [33m294470343[39m allocations.