In [1]:
using Pkg
cd(@__DIR__)
cd("..")
Pkg.activate(".")


using MyDiffMLP
using MyDiffMLP.MyAD
using MyDiffMLP.MyNN

[32m[1m  Activating[22m[39m project at `e:\Proga\AWID\MyDiffMLP`


In [None]:
# data = prepare_dataset(10000, 0.8)

# X_train = data.X_train
# y_train = data.y_train
# X_test = data.X_test
# y_test = data.y_test

    
# nothing

In [5]:
using JLD2
X_train = load("data/imdb_dataset_prepared.jld2", "X_train")
y_train = load("data/imdb_dataset_prepared.jld2", "y_train")
X_test  = load("data/imdb_dataset_prepared.jld2", "X_test")
y_test  = load("data/imdb_dataset_prepared.jld2", "y_test")
embeddings = load("data/imdb_dataset_prepared.jld2", "embeddings")
vocab = load("data/imdb_dataset_prepared.jld2", "vocab")

embedding_dim = size(embeddings,1)
vocab_size = length(vocab)


12849

In [None]:
using Printf, Statistics, Random
using .MyAD
using .MyNN

# === Define model ===
model = Chain(
    Embedding(vocab_size, embedding_dim; pretrained_weights=embeddings),
    Conv1D(embedding_dim, 8, 3, relu),
    MaxPool1D(8, 8),
    flatten_last_two_dims,
    Dense(128, 1, sigmoid)
)


# === Define loss and accuracy ===
function bce(ŷ, y)
    ϵ = 1e-7
    ŷ_clipped = clamp.(ŷ, ϵ, 1 .- ϵ)
    return -mean(y .* log.(ŷ_clipped) .+ (1 .- y) .* log.(1 .- ŷ_clipped))
end

function bce_grad(ŷ, y)
    ϵ = 1e-7
    return (ŷ .- y) ./ clamp.(ŷ .* (1 .- ŷ), ϵ, 1.0) ./ size(y, 2)
end

accuracy(ŷ, y) = mean((ŷ .> 0.5) .== (y .> 0.5))

# === Training settings ===
params = parameters(model)
state = AdamState(params)
epochs = 5
η = 0.001
batch_size = 16

function create_batches(X, Y; batchsize=64, shuffle=true)
    idxs = collect(1:size(X, 2))
    if shuffle
        Random.shuffle!(idxs)
    end
    return [(X[:, idxs[i:min(i+batchsize-1, end)]],
             Y[:, idxs[i:min(i+batchsize-1, end)]])
             for i in 1:batchsize:length(idxs)]
end

# === Training loop ===
for epoch in 1:epochs
    println("=== Epoch $epoch ===")
    total_loss = 0.0
    total_acc = 0.0
    num_batches = 0

    batches = create_batches(X_train, y_train, batchsize=batch_size)
    println("  → Training on $(length(batches)) batches of size $batch_size...")
    
    t = @elapsed begin
        for (i, (x, y)) in enumerate(batches)
            println("  → Batch $i")

            y_node = Variable(y, zeros(size(y)))
            out = model(x)
            graph = topological_sort(out)

            println("    Forward pass...")
            forward!(graph)

            ŷ = out.output
            loss = bce(ŷ, y)
            acc = accuracy(ŷ, y)

            total_loss += loss
            total_acc += acc
            num_batches += 1

            println(@sprintf("    Train loss: %.4f | acc: %.4f", loss, acc))

            out.gradient = bce_grad(ŷ, y)
            zero_gradients!(model)
            println("    Backward pass...")
            @time backward!(graph, out.gradient)

            update_adam!(state, params, η)
        end
    end

    train_loss = total_loss / num_batches
    train_acc = total_acc / num_batches

    println("  → Evaluation on test set...")
    out_eval = model(X_test)
    forward!(topological_sort(out_eval))
    test_pred = out_eval.output
    test_loss = bce(test_pred, y_test)
    test_acc = accuracy(test_pred, y_test)

    println(@sprintf("✅ Epoch %d finished in %.2fs", epoch, t))
    println(@sprintf("🏋️  Train: loss = %.4f, acc = %.4f", train_loss, train_acc))
    println(@sprintf("🧪  Test : loss = %.4f, acc = %.4f\n", test_loss, test_acc))
end



=== Epoch 1 ===
  → Training on 2500 batches of size 16...
  → Batch 1
    Forward pass...
    Train loss: 2.6626 | acc: 0.5000
    Backward pass...
  4.512027 seconds (3.01 M allocations: 244.081 MiB, 1.11% gc time, 77.44% compilation time)
  → Batch 2
    Forward pass...
    Train loss: 1.7524 | acc: 0.5625
    Backward pass...
  0.872652 seconds (445.98 k allocations: 114.724 MiB, 1.74% gc time)
  → Batch 3
    Forward pass...
    Train loss: 2.6889 | acc: 0.3750
    Backward pass...
  0.910604 seconds (445.98 k allocations: 114.724 MiB, 1.68% gc time)
  → Batch 4
    Forward pass...
    Train loss: 1.0215 | acc: 0.7500
    Backward pass...
  0.894780 seconds (445.98 k allocations: 114.723 MiB, 1.79% gc time)
  → Batch 5
    Forward pass...
    Train loss: 3.8225 | acc: 0.3125
    Backward pass...
  0.940852 seconds (445.98 k allocations: 114.722 MiB, 1.76% gc time)
  → Batch 6
    Forward pass...
    Train loss: 3.5404 | acc: 0.3750
    Backward pass...
  0.871189 seconds (445.98 k