In [1]:
using Pkg
cd(@__DIR__)
cd("..")
Pkg.activate(".")


using NeuDiff
using .NeuDiff.MyAD
using .NeuDiff.MyNN

[32m[1m  Activating[22m[39m project at `d:\Proga\AWID\MyDiffMLP`


In [None]:
# data = prepare_dataset(10000, 0.8)

# X_train = data.X_train
# y_train = data.y_train
# X_test = data.X_test
# y_test = data.y_test

    
# nothing

In [2]:
using JLD2
X_train = load("data/imdb_dataset_prepared.jld2", "X_train")
y_train = load("data/imdb_dataset_prepared.jld2", "y_train")
X_test  = load("data/imdb_dataset_prepared.jld2", "X_test")
y_test  = load("data/imdb_dataset_prepared.jld2", "y_test")
embeddings = load("data/imdb_dataset_prepared.jld2", "embeddings")
vocab = load("data/imdb_dataset_prepared.jld2", "vocab")

embedding_dim = size(embeddings,1)
vocab_size = length(vocab)


12849

In [4]:
using JLD2, Printf, Statistics, Random
using TimerOutputs, LinearAlgebra

# === Model ===
model = Chain(
    Embedding(vocab_size, embedding_dim),
    x -> PermuteDimsOp(x, (2, 1, 3)),  # (L, C, B) -> (C, L, B)
    Conv1D(embedding_dim, 8, 3, relu),
    MaxPool1D(8, 8),
    flatten_last_two_dims,
    Dense(128, 1, sigmoid)
)
model.layers[1].weight.output = embeddings


# === Loss and accuracy ===
function bce(yÃÇ, y)
    œµ = 1e-7
    yÃÇ_clipped = clamp.(yÃÇ, œµ, 1 .- œµ)
    return -mean(y .* log.(yÃÇ_clipped) .+ (1 .- y) .* log.(1 .- yÃÇ_clipped))
end

function bce_grad(yÃÇ, y)
    œµ = 1e-7
    yÃÇ_clipped = clamp.(yÃÇ, œµ, 1 .- œµ)
    return (yÃÇ_clipped .- y) ./ (yÃÇ_clipped .* (1 .- yÃÇ_clipped) * size(yÃÇ, 2))
end

accuracy(yÃÇ, y) = mean((yÃÇ .> 0.5) .== (y .> 0.5))

# === Optimizer ===
params = parameters(model)
state = AdamState(params)
Œ∑ = 0.001
epochs = 5
batch_size = 64

# === Mini-batch generator ===
function create_batches(X, Y; batchsize=64, shuffle=true)
    idxs = collect(1:size(X, 2))
    if shuffle
        Random.shuffle!(idxs)
    end
    return [(X[:, idxs[i:min(i+batchsize-1, end)]],
             Y[:, idxs[i:min(i+batchsize-1, end)]])
             for i in 1:batchsize:length(idxs)]
end


# === Training loop ===
for epoch in 1:epochs
    println("=== Epoch $epoch ===")
    total_loss, total_acc, num_batches = 0.0, 0.0, 0
    batches = create_batches(X_train, y_train, batchsize=batch_size)
    println("  ‚Üí Training on $(length(batches)) batches of size $batch_size...")

    t = @elapsed begin
        for (i, (x, y)) in enumerate(batches)
            out = model(x)
            graph = topological_sort(out)
            forward!(graph)

            yÃÇ = out.output
            loss = bce(yÃÇ, y)
            acc = accuracy(yÃÇ, y)

            total_loss += loss
            total_acc += acc
            num_batches += 1

            zero_gradients!(model)
            out.gradient = bce_grad(yÃÇ, y)
            backward!(graph, out.gradient)
            update_adam!(state, params, Œ∑)

            if i % 100 == 0 || i == length(batches)
                println(@sprintf("    Batch %d/%d: loss = %.4f, acc = %.4f", i, length(batches), loss, acc))
            end
        end
    end

    train_loss = total_loss / num_batches
    train_acc = total_acc / num_batches

    # === Evaluation ===
    println("  ‚Üí Evaluation on test set...")
    out_eval = model(X_test)
    forward!(topological_sort(out_eval))
    test_pred = out_eval.output
    test_loss = bce(test_pred, y_test)
    test_acc = accuracy(test_pred, y_test)

    println(@sprintf("‚úÖ Epoch %d finished in %.2fs", epoch, t))
    println(@sprintf("üèãÔ∏è  Train: loss = %.4f, acc = %.4f", train_loss, train_acc))
    println(@sprintf("üß™  Test : loss = %.4f, acc = %.4f\n", test_loss, test_acc))
end

=== Epoch 1 ===
  ‚Üí Training on 625 batches of size 64...
    Batch 100/625: loss = 0.6693, acc = 0.5781
    Batch 200/625: loss = 0.6016, acc = 0.6406
    Batch 300/625: loss = 0.5918, acc = 0.7031
    Batch 400/625: loss = 0.4715, acc = 0.7656
    Batch 500/625: loss = 0.4302, acc = 0.7656
    Batch 600/625: loss = 0.4234, acc = 0.7812
    Batch 625/625: loss = 0.4388, acc = 0.7969
  ‚Üí Evaluation on test set...
‚úÖ Epoch 1 finished in 42.66s
üèãÔ∏è  Train: loss = 0.5430, acc = 0.7142
üß™  Test : loss = 0.3842, acc = 0.8312

=== Epoch 2 ===
  ‚Üí Training on 625 batches of size 64...
    Batch 100/625: loss = 0.5510, acc = 0.7969
    Batch 200/625: loss = 0.3785, acc = 0.8125
    Batch 300/625: loss = 0.3820, acc = 0.8438
    Batch 400/625: loss = 0.3482, acc = 0.8594
    Batch 500/625: loss = 0.3089, acc = 0.8750
    Batch 600/625: loss = 0.3648, acc = 0.7812
    Batch 625/625: loss = 0.3542, acc = 0.7969
  ‚Üí Evaluation on test set...
‚úÖ Epoch 2 finished in 32.52s
üèãÔ∏è  T