In [1]:
using Pkg
cd(@__DIR__)
cd("..")
Pkg.activate(".")


using NeuDiff
using .NeuDiff.MyAD
using .NeuDiff.MyNN

[32m[1m  Activating[22m[39m project at `d:\Proga\AWID\MyDiffMLP`


In [2]:
# data = prepare_dataset(10000, 0.8)

# X_train = data.X_train
# y_train = data.y_train
# X_test = data.X_test
# y_test = data.y_test

    
# nothing

In [3]:
using JLD2
X_train = load("data/imdb_dataset_prepared.jld2", "X_train")
y_train = load("data/imdb_dataset_prepared.jld2", "y_train")
X_test  = load("data/imdb_dataset_prepared.jld2", "X_test")
y_test  = load("data/imdb_dataset_prepared.jld2", "y_test")
embeddings = load("data/imdb_dataset_prepared.jld2", "embeddings")
vocab = load("data/imdb_dataset_prepared.jld2", "vocab")

embedding_dim = size(embeddings,1)
vocab_size = length(vocab)


12849

In [4]:
using JLD2, Printf, Statistics, Random
using TimerOutputs, LinearAlgebra

# === Model ===
model = Chain(
    Embedding(vocab_size, embedding_dim),
    x -> PermuteDimsOp(x, (2, 1, 3)),  # (L, C, B) -> (C, L, B)
    Conv1D(embedding_dim, 8, 3, relu),
    MaxPool1D(8, 8),
    flatten_last_two_dims,
    Dense(128, 1, sigmoid)
)
model.layers[1].weight.output = embeddings


# === Loss and accuracy ===
function bce(ŷ, y)
    ϵ = 1e-7
    ŷ_clipped = clamp.(ŷ, ϵ, 1 .- ϵ)
    return -mean(y .* log.(ŷ_clipped) .+ (1 .- y) .* log.(1 .- ŷ_clipped))
end

function bce_grad(ŷ, y)
    ϵ = 1e-7
    ŷ_clipped = clamp.(ŷ, ϵ, 1 .- ϵ)
    return (ŷ_clipped .- y) ./ (ŷ_clipped .* (1 .- ŷ_clipped) * size(ŷ, 2))
end

accuracy(ŷ, y) = mean((ŷ .> 0.5) .== (y .> 0.5))

# === Optimizer ===
params = parameters(model)
state = AdamState(params)
η = 0.001
epochs = 5
batch_size = 64

# === Mini-batch generator ===
function create_batches(X, Y; batchsize=64, shuffle=true)
    idxs = collect(1:size(X, 2))
    if shuffle
        Random.shuffle!(idxs)
    end
    return [(X[:, idxs[i:min(i+batchsize-1, end)]],
             Y[:, idxs[i:min(i+batchsize-1, end)]])
             for i in 1:batchsize:length(idxs)]
end


# === Training loop ===
for epoch in 1:epochs
    println("=== Epoch $epoch ===")
    total_loss, total_acc, num_batches = 0.0, 0.0, 0
    batches = create_batches(X_train, y_train, batchsize=batch_size)
    println("  → Training on $(length(batches)) batches of size $batch_size...")

    t = @elapsed begin
        for (i, (x, y)) in enumerate(batches)
            out = model(x)
            graph = topological_sort(out)
            forward!(graph)

            ŷ = out.output
            loss = bce(ŷ, y)
            acc = accuracy(ŷ, y)

            total_loss += loss
            total_acc += acc
            num_batches += 1

            zero_gradients!(model)
            out.gradient = bce_grad(ŷ, y)
            backward!(graph, out.gradient)
            update_adam!(state, params, η)

            if i % 100 == 0 || i == length(batches)
                println(@sprintf("    Batch %d/%d: loss = %.4f, acc = %.4f", i, length(batches), loss, acc))
            end
        end
    end

    train_loss = total_loss / num_batches
    train_acc = total_acc / num_batches

    # === Evaluation ===
    println("  → Evaluation on test set...")
    out_eval = model(X_test)
    forward!(topological_sort(out_eval))
    test_pred = out_eval.output
    test_loss = bce(test_pred, y_test)
    test_acc = accuracy(test_pred, y_test)

    println(@sprintf("✅ Epoch %d finished in %.2fs", epoch, t))
    println(@sprintf("🏋️  Train: loss = %.4f, acc = %.4f", train_loss, train_acc))
    println(@sprintf("🧪  Test : loss = %.4f, acc = %.4f\n", test_loss, test_acc))
end

=== Epoch 1 ===
  → Training on 625 batches of size 64...
    Batch 100/625: loss = 0.6912, acc = 0.5625
    Batch 200/625: loss = 0.6163, acc = 0.7188
    Batch 300/625: loss = 0.5702, acc = 0.7500
    Batch 400/625: loss = 0.6218, acc = 0.6250
    Batch 500/625: loss = 0.5425, acc = 0.7031
    Batch 600/625: loss = 0.3847, acc = 0.8281
    Batch 625/625: loss = 0.4253, acc = 0.7969
  → Evaluation on test set...
✅ Epoch 1 finished in 43.34s
🏋️  Train: loss = 0.5812, acc = 0.6783
🧪  Test : loss = 0.4144, acc = 0.8176

=== Epoch 2 ===
  → Training on 625 batches of size 64...
    Batch 100/625: loss = 0.4191, acc = 0.8281
    Batch 200/625: loss = 0.4974, acc = 0.7656
    Batch 300/625: loss = 0.3311, acc = 0.8594
    Batch 400/625: loss = 0.2289, acc = 0.9531
    Batch 500/625: loss = 0.2632, acc = 0.8750
    Batch 600/625: loss = 0.4147, acc = 0.7656
    Batch 625/625: loss = 0.3525, acc = 0.8281
  → Evaluation on test set...
✅ Epoch 2 finished in 30.98s
🏋️  Train: loss = 0.3459, acc 