#   Setup modułów

In [1]:
include("../MyReverseDiff.jl")
include("../MyEmbedding.jl")
include("../MyMlp.jl")

using .MyReverseDiff
using .MyMlp
using JLD2
using Printf
using Random
using Flux
using Zygote

#   Przygotowanie danych

In [2]:
X_train = load("../../dataset/imdb_dataset_prepared.jld2", "X_train");
y_train = load("../../dataset/imdb_dataset_prepared.jld2", "y_train");
X_test = load("../../dataset/imdb_dataset_prepared.jld2", "X_test");
y_test = load("../../dataset/imdb_dataset_prepared.jld2", "y_test");
embeddings = load("../../dataset/imdb_dataset_prepared.jld2", "embeddings")


50×12849 Matrix{Float32}:
  0.90951   -0.58014    0.27137   0.68397   …   0.45505     0.014323  0.0
 -0.20702   -1.1316     0.61347  -0.68729      -0.0014904  -0.74624   0.0
 -0.090611   0.44189   -0.52498   0.8797       -0.45487     0.35701   0.0
 -0.63721   -0.048199  -0.7617   -0.35249      -0.15543     0.75488   0.0
  0.051387  -0.11754    0.37252   0.82288      -1.2866      0.11551   0.0
 -0.26292    0.97308    0.21401  -0.17179   …  -0.10727    -0.37074   0.0
  0.14454    1.0075    -1.0817   -1.4887        0.37509     0.80859   0.0
  0.40134   -1.2014     0.16501   0.98021       0.85616    -0.64355   0.0
  0.17305   -1.2752    -0.45105   0.031865     -0.045315   -0.63822   0.0
 -0.23503    0.66842   -0.77013  -0.7007       -0.3152      0.74175   0.0
  ⋮                                         ⋱                         
 -0.2502     0.54529    0.8323   -0.28752      -0.057228    0.36188   0.0
  0.37538    0.68665   -0.63336   1.0756        1.1033     -0.79495   0.0
  1.1403     0.

In [3]:
x_batch = X_train[:, 1]
y_batch = y_train[1]
x_batch_matrix = Int64.(reshape(x_batch, :, 1))
y_batch_matrix = Float32.(reshape([y_batch], 1, 1))

1×1 Matrix{Float32}:
 0.0

# Flux 1 batch, 1 przejscie forward backward

##  Przygotowanie modelu

In [22]:
f_model = Flux.Chain(
    Flux.Embedding(size(embeddings, 2), size(embeddings, 1)),
    x->permutedims(x, (2,1,3)),
    Conv((3,), size(embeddings, 1) => 8, Flux.relu),
    MaxPool((8,)),
    Flux.flatten,
    Flux.Dense(128, 1, Flux.σ)
)

f_model.layers[1].weight .= embeddings;

##  Wyciągnięcie zainicjowanych wag modelu

In [23]:
all_params = Flux.params(f_model)
params_array = collect(all_params)

f_embedding_weights = params_array[1]  # Zwykle pierwsza warstwa
f_conv_weights = params_array[2]       # Wagi konwolucji
f_conv_bias = params_array[3]          # Bias konwolucji  
f_dense_weights = params_array[4]      # Wagi Dense
f_dense_bias = params_array[5]         # Bias Dense

1-element Vector{Float32}:
 0.0

##  Przejscie forward, backward

In [6]:
# Jeśli chcesz mieć wyraźny podział na forward i backward
println("=== FORWARD PASS ===")
f_prediction = f_model(x_batch_matrix)
println("Predykcja: ", f_prediction)

# Oblicz loss bez backward pass
f_loss = Flux.Losses.binarycrossentropy(f_prediction, y_batch_matrix)
println("Loss: ", f_loss)

=== FORWARD PASS ===
Predykcja: Float32[0.8154277;;]
Loss: 1.6897135


In [9]:
println("\n=== BACKWARD PASS ===")
gradients = Flux.gradient(Flux.params(f_model)) do
    # To wykonuje forward pass ponownie, ale w kontekście gradientów
    
    loss = Flux.Losses.binarycrossentropy(f_model(x_batch_matrix), y_batch_matrix)
    return loss
end


=== BACKWARD PASS ===


│ Please see the docs for new explicit form.
│   caller = top-level scope at jl_notebook_cell_df34fa98e69747e1a8f8a730347b8e2f_X15sZmlsZQ==.jl:2
└ @ Core /home/oliwier/Repos/AWiD/MyMlp/src/notebooks/jl_notebook_cell_df34fa98e69747e1a8f8a730347b8e2f_X15sZmlsZQ==.jl:2


Grads(...)

In [10]:
    # Fallback - ręczne obliczenie gradientu
    function get_conv_gradient()
        x1 = f_model[1](x_batch_matrix)
        x2 = f_model[2](x1)
        conv_out = f_model[3](x2)
        
        grad = Zygote.gradient(conv_out -> begin
            x4 = f_model[4](conv_out)
            x5 = f_model[5](x4)
            pred = f_model[6](x5)
            Flux.Losses.binarycrossentropy(pred, y_batch_matrix)
        end, conv_out)[1]
        
        return grad
    end
    
    gradient_from_conv = get_conv_gradient()

128×8×1 Array{Float32, 3}:
[:, :, 1] =
  0.0         0.0        -0.153315  …  0.139678    0.0         0.0
  0.0        -0.0782701   0.0          0.0         0.0         0.0
  0.0         0.0         0.0          0.0         0.0         0.0
  0.0         0.0         0.0          0.0         0.0         0.0
  0.139928    0.0         0.0          0.0        -0.153387    0.0
  0.0         0.0         0.0       …  0.0         0.0        -0.0603627
  0.0         0.0         0.0          0.0         0.0         0.0
  0.0         0.0         0.0          0.0         0.0         0.0
  0.0         0.0         0.0          0.0         0.0         0.0
  0.0         0.0         0.0          0.0         0.0         0.0
  ⋮                                 ⋱  ⋮                      
  0.0         0.0         0.0          0.0         0.0         0.0
 -0.0340818   0.132149    0.17002   …  0.0323642  -0.0579769  -0.0406392
  0.0         0.0         0.0          0.0         0.0         0.0
  0.0         0

In [11]:
# Zapisz gradienty do zmiennych
params_list = collect(Flux.params(f_model))
f_grad_embedding = gradients[params_list[1]]
f_grad_conv_weights = gradients[params_list[2]]

f_grad_conv_bias = gradients[params_list[3]]
f_grad_conv_bias = reshape(f_grad_conv_bias, 1, length(f_grad_conv_bias))

f_grad_dense_weights = gradients[params_list[4]]
f_grad_dense_bias = gradients[params_list[5]]

1-element Vector{Float32}:
 0.8154272

# Nasz CNN 1 batch, 1 przejście forward, backward

##  Przygotowanie modelu

In [4]:
model = MyMlp.Chain(
    MyMlp.Embedding(embeddings, name="embedding"),
    MyMlp.TransposeBlock(),
    MyMlp.ConvolutionBlock(3,50,8, name="layer1"),
    MyMlp.PoolingBlock(8),
    MyMlp.FlattenBlock(name="flatten"),
    MyMlp.Dense(size(X_train, 1)-2, 1, MyMlp.σ, name="softnet")
)

x_input_node = Constant(Float32.(x_batch_matrix))
y_label_node = Constant(y_batch_matrix)


#   Budowanie grafu treningowego
loss_node, model_output_node, order = build_graph!(model, binarycrossentropy, x_input_node, y_label_node; loss_name="loss")

(op loss(typeof(Main.MyReverseDiff.binary_cross_entropy_loss_impl)), op.softnet_sigmoid(typeof(Main.MyReverseDiff.σ)), GraphNode[var softnet_w
 ┣━ ^ 1×128 Matrix{Float32}
 ┗━ ∇ 1×128 Matrix{Float32}, var embedding_W
 ┣━ ^ 50×12849 Matrix{Float32}
 ┗━ ∇ 50×12849 Matrix{Float32}, const Float32[6391.0; 143.0; … ; 12849.0; 12849.0;;], op.embedding_output(typeof(Main.MyEmbedding.embedding)), op.transposition(typeof(Main.MyReverseDiff.transpose)), var layer1_masks_w
 ┣━ ^ 3×50×8 Array{Float32, 3}
 ┗━ ∇ 3×50×8 Array{Float32, 3}, op.layer1_conv(typeof(Main.MyReverseDiff.conv)), var layer1_masks_b
 ┣━ ^ 1×8 Matrix{Float32}
 ┗━ ∇ 1×8 Matrix{Float32}, op.layer1_bias(typeof(+)), op.layer1_activation(typeof(Main.MyReverseDiff.relu)), const Float32[8.0;;], op.pooling_pool(typeof(max_pool)), op.flatten(typeof(flatten)), op.softnet_mul(typeof(LinearAlgebra.mul!)), var softnet_b
 ┣━ ^ 1×1 Matrix{Float32}
 ┗━ ∇ 1×1 Matrix{Float32}, op.softnet_add(typeof(+)), op.softnet_sigmoid(typeof(Main.MyReverseDiff.

##  Przeniesienie wag z Flux

In [13]:
order[2].output=f_embedding_weights

order[6].output=reverse(f_conv_weights, dims=1)

order[8].output=reshape(f_conv_bias, length(f_conv_bias), 1)'

order[1].output=f_dense_weights

order[15].output=reshape(f_dense_bias, length(f_dense_bias), 1)'

1×1 adjoint(::Matrix{Float32}) with eltype Float32:
 0.0

In [14]:
println("Embedding: ", isapprox(order[2].output, f_embedding_weights))
println("Conv weights: ", isapprox(reverse(order[6].output, dims=1), f_conv_weights))
println("Conv bias: ", isapprox(order[8].output', f_conv_bias))
println("Dense weights: ", isapprox(order[1].output, f_dense_weights))
println("Dense bias: ", isapprox(order[15].output', f_dense_bias))

Embedding: true
Conv weights: true
Conv bias: true
Dense weights: true
Dense bias: true


##  Przejście forward i backward

In [5]:
loss = forward!(order)
prediction = model_output_node.output

1×1 Matrix{Float32}:
 0.22793055

In [6]:
backward!(order)

In [7]:
order[4].gradient[:, 1, 1]

50-element Vector{Float32}:
 -0.003311243
 -0.0021821815
  0.00012774358
  0.016228314
 -0.004325994
 -0.0004145885
 -0.0029961865
 -0.01029325
 -0.0070450525
  0.0076419823
  ⋮
  0.004769509
 -0.0019003567
 -0.00490231
 -0.0032707406
 -0.0087365005
  0.0036606088
  0.0031178193
  0.002676794
  0.0027534144

In [8]:
order[5].gradient[1, :, 1]

50-element Vector{Float32}:
 -0.003311243
 -0.0021821815
  0.00012774358
  0.016228314
 -0.004325994
 -0.0004145885
 -0.0029961865
 -0.01029325
 -0.0070450525
  0.0076419823
  ⋮
  0.004769509
 -0.0019003567
 -0.00490231
 -0.0032707406
 -0.0087365005
  0.0036606088
  0.0031178193
  0.002676794
  0.0027534144

#   Porownanie forward dla Flux i naszego

In [14]:
isapprox(f_prediction[1], prediction[1])

true

In [15]:
# Bezpośredni dostęp do warstw
f_x = reshape(Int.(x_batch), length(x_batch), 1)

# Krok po kroku
f_x1 = f_model[1](f_x)          # Embedding
println("Po Embedding: ", size(f_x1))

f_x2 = f_model[2](f_x1)         # permutedims
println("Po permutedims: ", size(f_x2))

f_x3 = f_model[3](f_x2)         # Conv
println("Po Conv: ", size(f_x3))

f_x4 = f_model[4](f_x3)         # MaxPool
println("Po MaxPool: ", size(f_x4))

f_x5 = f_model[5](f_x4)         # flatten
println("Po flatten: ", size(f_x5))

f_output = f_model[6](f_x5)     # Dense
println("Final output: ", f_output)

Po Embedding: (50, 130, 1)
Po permutedims: (130, 50, 1)
Po Conv: (128, 8, 1)
Po MaxPool: (16, 8, 1)
Po flatten: (128, 1)
Final output: Float32[0.8352198;;]


In [16]:
# output Embedding
println("Embedding output: ", isapprox(order[4].output, f_x1))
# output permutedims
println("Permutedims output: ", isapprox(order[5].output, f_x2))
# output Conv
println("Conv output: ", isapprox(order[10].output, f_x3))
# output MaxPool
println("MaxPool output: ", isapprox(order[12].output, f_x4))
# output flatten
println("Flatten output: ", isapprox(order[13].output, f_x5))
# output Dense
println("Dense output: ", isapprox(order[17].output, f_output))
# output loss
println("Loss output: ", isapprox(loss, f_loss))

Embedding output: true
Permutedims output: true
Conv output: true
MaxPool output: true
Flatten output: true
Dense output: true
Loss output: true


# Porównanie backward dla Flux i naszego

In [17]:
grad_embedding = order[2].gradient
grad_dense_weights = order[1].gradient
grad_dense_bias = order[15].gradient
grad_conv_weights = reverse(order[6].gradient, dims=1)
grad_conv_bias = order[8].gradient

1×8 Matrix{Float32}:
 0.195691  -0.315396  -0.218871  -0.7107  …  0.677441  0.64355  0.19432

In [18]:
println("Dense bias grad: ", isapprox(grad_dense_bias, f_grad_dense_bias))
println("Dense weights grad: ", isapprox(grad_dense_weights, f_grad_dense_weights))
println("Conv weights grad: ", isapprox(grad_conv_weights, f_grad_conv_weights))
println("Conv bias grad: ", isapprox(grad_conv_bias, f_grad_conv_bias))
println("Embedding grad: ", isapprox(grad_embedding, f_grad_embedding))

Dense bias grad: true
Dense weights grad: true
Conv weights grad: true
Conv bias grad: true
Embedding grad: false
