In [1]:
include("../MyReverseDiff.jl")
include("../MyEmbedding.jl")
include("../MyMlp.jl")
include("../TensorOperations.jl")

using .MyReverseDiff
using .MyMlp
using JLD2
using Printf
using Random

In [2]:
x = randn(Float32,50,130,64)
m = randn(Float32,3,1)
g = ones(Float32,size(x,1),size(x,2)*size(m,2),size(x,3))

using BenchmarkTools
#@btime MyReverseDiff.dif_convolution(x,m,g)
@btime MyReverseDiff.multi_convolution(x,m)

  8.460 ms (416268 allocations: 16.03 MiB)


50×130×64 Array{Float32, 3}:
[:, :, 1] =
 -3.04443     0.504028  -1.16704   …   1.13953    -1.68675     4.42868
  0.486429    0.89772   -2.84683       0.965314   -2.42314     6.73673
  0.377292   -1.1309    -1.28232       2.21007    -2.58822     2.32216
  3.48555     2.25395    0.651317      4.12646    -1.53565     2.9489
 -0.230689   -0.787347  -2.49989       3.45516     2.93407     5.79568
  1.00348    -0.243322   1.78666   …   7.31157    -4.01358     2.08687
 -1.80243    -2.03204   -1.44316       0.75721     1.64116     2.7814
  0.481479    1.87765   -1.94287       1.31597     3.42113     1.53179
 -1.33072     3.30427   -0.260654     -0.366261   -0.995645    1.52391
  2.05919     0.697039  -2.45927       2.81124    -2.98661     2.66104
  ⋮                                ⋱                          
  2.21238    -0.697261  -3.5851       -0.373332   -3.18817    -2.49045
 -0.0881289  -0.244008  -3.77782      -1.2236      0.0500635  -2.94695
 -0.877933    0.554768  -1.35706      -1.25992

- 9.833 ms (416835 allocations: 25.48 MiB)
- 9.533 ms (416646 allocations: 23.85 MiB)
- 8.691 ms (416457 allocations: 19.16 MiB)
- 8.274 ms (416268 allocations: 16.03 MiB)

-  284.843 ms (851209 allocations: 545.02 MiB)
- 269.293 ms (851017 allocations: 541.87 MiB)

## Przygotowanie danych IMDB

In [2]:
X_train = load("../../dataset/imdb_dataset_prepared.jld2", "X_train");
y_train = load("../../dataset/imdb_dataset_prepared.jld2", "y_train");
X_test = load("../../dataset/imdb_dataset_prepared.jld2", "X_test");
y_test = load("../../dataset/imdb_dataset_prepared.jld2", "y_test");
embeddings = load("../../dataset/imdb_dataset_prepared.jld2", "embeddings")
# vocab = load("../../dataset/imdb_dataset_prepared.jld2", "vocab");

input_size = size(X_train,1) # Liczba cech
embedding_dim = size(embeddings,1)

50

##  Trening modelu

In [None]:
batch_size = 64

model = Chain(
    Embedding(embeddings, name = "embedding"),
    ConvolutionBlock(1,3,name="layer1"),
    Dense3D(embedding_dim, 8, relu, name="dense1"),
    PoolingBlock(8),
    FlattenBlock(name="flatten"),
    Dense(input_size, 1, σ, name="softnet")
)

#   Utworzenie początkowych węzłów Constant dla danych wejściowych i etykiet
x_input_node = Constant(zeros(Float32, input_size, batch_size))
y_label_node = Constant(zeros(Float32, 1, batch_size))

#   Budowanie grafu treningowego
loss_node, model_output_node, order = build_graph!(model, binarycrossentropy, x_input_node, y_label_node; loss_name="loss")

optimizer_state = setup_optimizer(Adam(), model)

epochs = 5

println("--- Rozpoczynam profilowanie treningu ---")

for epoch in 1:epochs
    permutation = randperm(size(X_train, 2))
    X_train_shuffled_epoch = X_train[:,permutation]
    y_train_shuffled_epoch = y_train[:, permutation]
    num_batches = ceil(Int, size(X_train, 2) / batch_size)

    loss_value = 0.0

    println("\nEpoch: $epoch")
    total_batch_time = 0.0
    total_batch_alloc = 0
    total_batch_gc_time = 0.0

    for i in 1:num_batches
        start_idx = (i - 1) * batch_size + 1
        end_idx = min(i * batch_size, size(X_train, 2))
        x_batch_view = view(X_train_shuffled_epoch, :,  start_idx:end_idx)
        y_batch_view = view(y_train_shuffled_epoch, :, start_idx:end_idx)

        current_batch_size = size(x_batch_view, 2)
        view(x_input_node.output, :, 1:current_batch_size) .= x_batch_view
        view(y_label_node.output, :, 1:current_batch_size) .= y_batch_view

        stats = @timed begin # `timed` zwraca strukturę z wynikami, `time` tylko czas
            forward!(order)
            backward!(order)
            step!(optimizer_state) # Zakładam, że masz już zaimplementowane step!
        end
        loss_value += loss_node.output # Upewnij się, że loss_node.output jest odświeżane po forward

        total_batch_time += stats.time
        total_batch_alloc += stats.bytes
        total_batch_gc_time += stats.gctime
    end
    
    avg_loss_epoch = loss_value / num_batches

    println(@sprintf("Epoch: %d \tTrain: (l: %.4f) \tTotal Batch Time: %.4fs \tTotal Alloc: %s \tGC Time: %.4fs", 
                     epoch, avg_loss_epoch, total_batch_time, Base.format_bytes(total_batch_alloc), total_batch_gc_time))
end

println("\n--- Koniec profilowania treningu ---")

In [None]:
# --- Test Evaluation ---

batch_size = 64
num_test_samples = size(X_test, 2)
num_batches = ceil(Int, num_test_samples / batch_size)
total_test_loss_sum = 0.0
total_correct_predictions = 0.0

t_test = @elapsed begin
    for i in 1:num_batches

        start_idx = (i - 1) * batch_size + 1
        end_idx = min(i * batch_size, num_test_samples)
        x_batch_test = X_test[:, start_idx:end_idx]
        y_batch_test = y_test[:, start_idx:end_idx]

        # Aktualna liczba próbek w bieżącym batchu (może być mniejsza dla ostatniego batcha)
        current_test_batch_size = size(x_batch_test, 2)

        view(x_input_node.output, :, 1:current_test_batch_size) .= x_batch_test
        view(y_label_node.output, :, 1:current_test_batch_size) .= y_batch_test

        forward!(order)

        predictions = view(model_output_node.output, :, 1:current_test_batch_size)


        batch_loss = loss_node.output
        
        total_test_loss_sum += batch_loss * current_test_batch_size # Sumuj stratę, uwzględniając rozmiar batcha

        # --- Oblicz dokładność na bieżącym batchu testowym ---
        # Dla klasyfikacji binarnej z progiem 0.5 (lub innym, w zależności od problemu)
        batch_accuracy = sum((predictions .> 0.5f0) .== y_batch_test) / current_test_batch_size
        total_correct_predictions += batch_accuracy * current_test_batch_size # Sumuj poprawne predykcje
    end
end

# --- Oblicz średnią stratę i średnią dokładność na całym zbiorze testowym ---
avg_test_loss = total_test_loss_sum / num_test_samples
avg_test_accuracy = total_correct_predictions / num_test_samples * 100.0

println(@sprintf("Test Loss (czas: %.2fs): %.4f", t_test, avg_test_loss))
println("Test Accuracy: $avg_test_accuracy %")