In [1]:
using Pkg
Pkg.activate("../..")

[32m[1m  Activating[22m[39m project at `~/AWID`


In [2]:
using JLD2
X_train = load("../../data/embeddings/imdb_dataset_prepared.jld2", "X_train")
y_train = load("../../data/embeddings/imdb_dataset_prepared.jld2", "y_train")
X_test = load("../../data/embeddings/imdb_dataset_prepared.jld2", "X_test")
y_test = load("../../data/embeddings/imdb_dataset_prepared.jld2", "y_test")
embeddings = load("../../data/embeddings/imdb_dataset_prepared.jld2", "embeddings")
vocab = load("../../data/embeddings/imdb_dataset_prepared.jld2", "vocab")
nothing

embedding_dim = size(embeddings, 1)

50

In [3]:
using AWID.NeuralNetwork

embedding_layer = Embedding(length(vocab), embedding_dim)
embedding_layer.weight.output .= embeddings

model = Chain(
    embedding_layer,
    Conv1D(3, embedding_dim => 8, relu), # 8 różnych filtrów, każdy uczy się wykrywać inny wzorzec w 3 sąsiednich słowach
    MaxPool1D(8), # dla każdego z 8 filtrów wybiera spośród 8 okien (po 3 słowa) te z największą aktywacją 
    Flatten(),
    Dense(128, 1, sigmoid)
)

seq_len = size(X_train, 1)
batch_size = 64
input_shape = (seq_len, batch_size)
summary(model, input_shape)

Model Summary:
Input shape: (130, 64)
Layer 1: Embedding -> Output shape: (50, 130, 64)
Layer 2: Conv1D -> Output shape: (8, 128, 64)
Layer 3: MaxPool1D -> Output shape: (8, 16, 64)
Layer 4: Flatten -> Output shape: (128, 64)
Layer 5: Dense -> Output shape: (1, 64)


In [4]:
using Printf

train_on_batch, test_loss_and_accuracy = setup_training_functions(
    model=model,
    loss_fn=binary_crossentropy,
    accuracy_fn=accuracy,
    optimizer=Adam(),
    x_test=X_test,
    y_test=y_test,
    batch_size=batch_size,
)

epochs = 5
for epoch in 1:epochs
    epoch_total_loss = 0.0f0
    epoch_total_acc = 0.0f0
    num_processed_batches = 0

    epoch_batches = get_epoch_batches(X_train, y_train, batch_size=batch_size, do_shuffle=true)

    t = @elapsed begin
        for (x_batch, y_batch) in epoch_batches
            batch_loss, batch_acc = train_on_batch(x_batch, y_batch)

            epoch_total_loss += batch_loss
            epoch_total_acc += batch_acc
            num_processed_batches += 1
        end

        train_loss = epoch_total_loss / num_processed_batches
        train_acc = epoch_total_acc / num_processed_batches

        test_loss, test_acc = test_loss_and_accuracy()
    end

    println(@sprintf("Epoch: %d (%.2fs) \tTrain: (l: %.2f, a: %.2f) \tTest: (l: %.2f, a: %.2f)",
        epoch, t, train_loss, train_acc, test_loss, test_acc))
end

Epoch: 1 (67.99s) 	Train: (l: 0.57, a: 0.69) 	Test: (l: 0.43, a: 0.80)
Epoch: 2 (45.78s) 	Train: (l: 0.35, a: 0.85) 	Test: (l: 0.34, a: 0.85)
Epoch: 3 (46.45s) 	Train: (l: 0.27, a: 0.89) 	Test: (l: 0.33, a: 0.86)
Epoch: 4 (47.67s) 	Train: (l: 0.21, a: 0.92) 	Test: (l: 0.33, a: 0.87)
Epoch: 5 (55.02s) 	Train: (l: 0.15, a: 0.95) 	Test: (l: 0.36, a: 0.86)
