In [1]:
include("../MyReverseDiff.jl")
include("../MyEmbedding.jl")
include("../MyMlp.jl")
include("../TensorOperations.jl")

using .MyReverseDiff
using .MyMlp
using .TensorOperations
using JLD2
using Printf
using Random

In [2]:
x = randn(Float32, 50, 130, 64)
m = randn(Float32, 3, 1)
g = ones(Float32, size(x, 1), size(x, 2) * size(m, 2), size(x, 3))

using BenchmarkTools
@btime MyReverseDiff.dif_convolution(x, m, g)
#@btime MyReverseDiff.multi_convolution(x,m)

  62.341 ms (18260 allocations: 107.17 MiB)


(Float32[1.0645225 1.0645225 … 1.0645225 1.0645225; 0.16612083 0.16612083 … 0.16612083 0.16612083; … ; 0.07603661 0.07603661 … 0.07603661 0.07603661; 0.07603661 0.07603661 … 0.07603661 0.07603661;;; 1.0645225 1.0645225 … 1.0645225 1.0645225; 0.16612083 0.16612083 … 0.16612083 0.16612083; … ; 0.07603661 0.07603661 … 0.07603661 0.07603661; 0.07603661 0.07603661 … 0.07603661 0.07603661;;; 1.0645225 1.0645225 … 1.0645225 1.0645225; 0.16612083 0.16612083 … 0.16612083 0.16612083; … ; 0.07603661 0.07603661 … 0.07603661 0.07603661; 0.07603661 0.07603661 … 0.07603661 0.07603661;;; … ;;; 1.0645225 1.0645225 … 1.0645225 1.0645225; 0.16612083 0.16612083 … 0.16612083 0.16612083; … ; 0.07603661 0.07603661 … 0.07603661 0.07603661; 0.07603661 0.07603661 … 0.07603661 0.07603661;;; 1.0645225 1.0645225 … 1.0645225 1.0645225; 0.16612083 0.16612083 … 0.16612083 0.16612083; … ; 0.07603661 0.07603661 … 0.07603661 0.07603661; 0.07603661 0.07603661 … 0.07603661 0.07603661;;; 1.0645225 1.0645225 … 1.0645225 1.0

- 9.833 ms (416835 allocations: 25.48 MiB)
- 9.533 ms (416646 allocations: 23.85 MiB)
- 8.691 ms (416457 allocations: 19.16 MiB)
- 8.274 ms (416268 allocations: 16.03 MiB)
- 2.293 ms (268 allocations: 3.33 MiB)
- 2.170 ms (79 allocations: 1.76 MiB)

- 284.843 ms (851209 allocations: 545.02 MiB)
- 269.293 ms (851017 allocations: 541.87 MiB)
- 297.682 ms (19017 allocations: 516.48 MiB)
- 265.192 ms (18825 allocations: 514.88 MiB)
- 250.897 ms (18831 allocations: 518.13 MiB)
- 184.586 ms (18447 allocations: 310.23 MiB)
- 107.900 ms (18068 allocations: 105.58 MiB)

## Przygotowanie danych IMDB

In [2]:
X_train = load("../../dataset/imdb_dataset_prepared.jld2", "X_train");
y_train = load("../../dataset/imdb_dataset_prepared.jld2", "y_train");
X_test = load("../../dataset/imdb_dataset_prepared.jld2", "X_test");
y_test = load("../../dataset/imdb_dataset_prepared.jld2", "y_test");
embeddings = load("../../dataset/imdb_dataset_prepared.jld2", "embeddings")
# vocab = load("../../dataset/imdb_dataset_prepared.jld2", "vocab");

input_size = size(X_train, 1) # Liczba cech
embedding_dim = size(embeddings, 1)

50

##  Trening modelu

In [3]:
batch_size = 64

model = Chain(
    Embedding(embeddings, name="embedding"),
    #TransposeBlock(),
    ConvolutionBlock(1, 3, name="layer1"),
    #TransposeBlock(),
    Dense3D(embedding_dim, 8, relu, name="dense1"),
    PoolingBlock(8),
    FlattenBlock(name="flatten"),
    Dense(input_size, 1, σ, name="softnet")
)

#   Utworzenie początkowych węzłów Constant dla danych wejściowych i etykiet
x_input_node = Constant(zeros(Float32, input_size, batch_size))
y_label_node = Constant(zeros(Float32, 1, batch_size))

#   Budowanie grafu treningowego
loss_node, model_output_node, order = build_graph!(model, binarycrossentropy, x_input_node, y_label_node; loss_name="loss")

optimizer_state = setup_optimizer(Adam(a=0.003f0), model)

epochs = 5

5

In [None]:
# Ustawienie wag
order[5].output             # Maska konwolucji
order[7].output             # Dense3D wagi
order[8].output             # Dense3D bias
order[14].inputs[1].output  # Dense wagi
order[15].output            # Dense bias

In [5]:

println("--- Rozpoczynam profilowanie treningu ---")

for epoch in 1:epochs
    permutation = randperm(size(X_train, 2))
    X_train_shuffled_epoch = X_train[:, permutation]
    y_train_shuffled_epoch = y_train[:, permutation]
    num_batches = ceil(Int, size(X_train, 2) / batch_size)

    loss_value = 0.0

    println("\nEpoch: $epoch")
    total_batch_time = 0.0
    total_batch_alloc = 0
    total_batch_gc_time = 0.0

    for i in 1:num_batches
        start_idx = (i - 1) * batch_size + 1
        end_idx = min(i * batch_size, size(X_train, 2))
        x_batch_view = view(X_train_shuffled_epoch, :, start_idx:end_idx)
        y_batch_view = view(y_train_shuffled_epoch, :, start_idx:end_idx)

        current_batch_size = size(x_batch_view, 2)
        view(x_input_node.output, :, 1:current_batch_size) .= x_batch_view
        view(y_label_node.output, :, 1:current_batch_size) .= y_batch_view

        stats = @timed begin # `timed` zwraca strukturę z wynikami, `time` tylko czas
            forward!(order)
            backward!(order)
            step!(optimizer_state) # Zakładam, że masz już zaimplementowane step!
        end
        loss_value += loss_node.output # Upewnij się, że loss_node.output jest odświeżane po forward

        total_batch_time += stats.time
        total_batch_alloc += stats.bytes
        total_batch_gc_time += stats.gctime
    end

    avg_loss_epoch = loss_value / num_batches

    println(@sprintf("Epoch: %d \tTrain: (l: %.4f) \tTotal Batch Time: %.4fs \tTotal Alloc: %s \tGC Time: %.4fs",
        epoch, avg_loss_epoch, total_batch_time, Base.format_bytes(total_batch_alloc), total_batch_gc_time))
end

println("\n--- Koniec profilowania treningu ---")

--- Rozpoczynam profilowanie treningu ---

Epoch: 1
Epoch: 1 	Train: (l: 0.7049) 	Total Batch Time: 84.8330s 	Total Alloc: 99.666 GiB 	GC Time: 17.8597s

Epoch: 2
Epoch: 2 	Train: (l: 0.6798) 	Total Batch Time: 77.4106s 	Total Alloc: 98.785 GiB 	GC Time: 16.8643s

Epoch: 3
Epoch: 3 	Train: (l: 0.6548) 	Total Batch Time: 79.1648s 	Total Alloc: 98.785 GiB 	GC Time: 16.7144s

Epoch: 4
Epoch: 4 	Train: (l: 0.5979) 	Total Batch Time: 77.8049s 	Total Alloc: 98.785 GiB 	GC Time: 16.0937s

Epoch: 5
Epoch: 5 	Train: (l: 0.4985) 	Total Batch Time: 87.1895s 	Total Alloc: 98.785 GiB 	GC Time: 17.7275s

--- Koniec profilowania treningu ---


In [6]:
# --- Test Evaluation ---

batch_size = 64
num_test_samples = size(X_test, 2)
num_batches = ceil(Int, num_test_samples / batch_size)
total_test_loss_sum = 0.0
total_correct_predictions = 0.0

t_test = @elapsed begin
    for i in 1:num_batches

        start_idx = (i - 1) * batch_size + 1
        end_idx = min(i * batch_size, num_test_samples)
        x_batch_test = X_test[:, start_idx:end_idx]
        y_batch_test = y_test[:, start_idx:end_idx]

        # Aktualna liczba próbek w bieżącym batchu (może być mniejsza dla ostatniego batcha)
        current_test_batch_size = size(x_batch_test, 2)

        view(x_input_node.output, :, 1:current_test_batch_size) .= x_batch_test
        view(y_label_node.output, :, 1:current_test_batch_size) .= y_batch_test

        forward!(order)

        predictions = view(model_output_node.output, :, 1:current_test_batch_size)


        batch_loss = loss_node.output

        total_test_loss_sum += batch_loss * current_test_batch_size # Sumuj stratę, uwzględniając rozmiar batcha

        # --- Oblicz dokładność na bieżącym batchu testowym ---
        # Dla klasyfikacji binarnej z progiem 0.5 (lub innym, w zależności od problemu)
        batch_accuracy = sum((predictions .> 0.5f0) .== y_batch_test) / current_test_batch_size
        total_correct_predictions += batch_accuracy * current_test_batch_size # Sumuj poprawne predykcje
    end
end

# --- Oblicz średnią stratę i średnią dokładność na całym zbiorze testowym ---
avg_test_loss = total_test_loss_sum / num_test_samples
avg_test_accuracy = total_correct_predictions / num_test_samples * 100.0

println(@sprintf("Test Loss (czas: %.2fs): %.4f", t_test, avg_test_loss))
println("Test Accuracy: $avg_test_accuracy %")

Test Loss (czas: 1.46s): 0.5637
Test Accuracy: 72.28 %


## Perfomnance test

In [2]:
using Statistics
# test params
s = 10
cut = 64 * s
batch_size = 64
epochs = 5

function performance(k)
    results = zeros(Float32,k)
    for i=1:k
        results[i]=perform_test()
    end
    println("K: ",k)
    println("Results: ", results)
    println("Avarage: ", mean(results))
    println("Var: ", var(results))
end

function perform_test()

    X_train = load("../../dataset/imdb_dataset_prepared.jld2", "X_train")[:, 1:cut]
    y_train = load("../../dataset/imdb_dataset_prepared.jld2", "y_train")[:, 1:cut]
    X_test = load("../../dataset/imdb_dataset_prepared.jld2", "X_test")
    y_test = load("../../dataset/imdb_dataset_prepared.jld2", "y_test")
    embeddings = load("../../dataset/imdb_dataset_prepared.jld2", "embeddings")
    # vocab = load("../../dataset/imdb_dataset_prepared.jld2", "vocab");

    input_size = size(X_train, 1) # Liczba cech
    embedding_dim = size(embeddings, 1)

    model = Chain(
        Embedding(embeddings, name="embedding"),
        ConvolutionBlock(1, 3, name="layer1"),
        Dense3D(embedding_dim, 8, relu, name="dense1"),
        PoolingBlock(8),
        FlattenBlock(name="flatten"),
        Dense(input_size, 1, σ, name="softnet")
    )

    #   Utworzenie początkowych węzłów Constant dla danych wejściowych i etykiet
    x_input_node = Constant(zeros(Float32, input_size, batch_size))
    y_label_node = Constant(zeros(Float32, 1, batch_size))

    #   Budowanie grafu treningowego
    loss_node, model_output_node, order = build_graph!(model, binarycrossentropy, x_input_node, y_label_node; loss_name="loss")

    optimizer_state = setup_optimizer(Adam(), model)
    avg_loss_epoch = 0.0f0
    for epoch in 1:epochs
        permutation = randperm(size(X_train, 2))
        X_train_shuffled_epoch = X_train[:, permutation]
        y_train_shuffled_epoch = y_train[:, permutation]
        num_batches = ceil(Int, size(X_train, 2) / batch_size)

        loss_value = 0.0

        total_batch_time = 0.0
        total_batch_alloc = 0
        total_batch_gc_time = 0.0

        for i in 1:num_batches
            start_idx = (i - 1) * batch_size + 1
            end_idx = min(i * batch_size, size(X_train, 2))
            x_batch_view = view(X_train_shuffled_epoch, :, start_idx:end_idx)
            y_batch_view = view(y_train_shuffled_epoch, :, start_idx:end_idx)

            current_batch_size = size(x_batch_view, 2)
            view(x_input_node.output, :, 1:current_batch_size) .= x_batch_view
            view(y_label_node.output, :, 1:current_batch_size) .= y_batch_view

            stats = @timed begin # `timed` zwraca strukturę z wynikami, `time` tylko czas
                forward!(order)
                backward!(order)
                step!(optimizer_state) # Zakładam, że masz już zaimplementowane step!
            end
            loss_value += loss_node.output # Upewnij się, że loss_node.output jest odświeżane po forward

            total_batch_time += stats.time
            total_batch_alloc += stats.bytes
            total_batch_gc_time += stats.gctime
        end

        avg_loss_epoch = loss_value / num_batches

    end
    avg_loss_epoch
end


perform_test (generic function with 1 method)

In [3]:
performance(10)

K: 10
Results: Float32[0.9373993, 0.73377645, 0.7593586, 0.95225406, 0.8279859, 0.80444765, 0.9878423, 1.1164854, 0.88346535, 0.69636226]
Avarage: 0.8699378
Var: 0.017015807


### Normal
K: 10
Avarage: 0.6230623
Var: 0.0007078699
### Transpose block
K: 10
Avarage: 0.7378746
Var: 0.0013544216
### Bach resize
K: 10
Avarage: 0.6238495
Var: 0.0006545016
### No embedding training
K: 10
Avarage: 0.72589546
Var: 0.001047738
### Newest
K: 10
Avarage: 0.6180767
Var: 0.00036857463
### Learning
K: 10
Avarage: 0.4358875
Var: 0.003978955