In [5]:
using Pkg
cd(@__DIR__)
cd("..")
Pkg.activate(".")
using MyDiffMLP
using MyDiffMLP.MyAD
using MyDiffMLP.MyNN

[32m[1m  Activating[22m[39m project at `d:\Proga\AWID\MyDiffMLP`
[92m[1mPrecompiling[22m[39m project...
   3003.5 ms[33m  ✓ [39mMyDiffMLP
  1 dependency successfully precompiled in 4 seconds. 214 already precompiled.
  [33m1[39m dependency precompiled but a different version is currently loaded. Restart julia to access the new version. Otherwise, loading dependents of this package may trigger further precompilation to work with the unexpected version.


In [14]:
data = prepare_dataset(10000, 0.8)

X_train = data.X_train
y_train = data.y_train
X_test = data.X_test
y_test = data.y_test

    
nothing

┌ Info: Loading raw dataset...
└ @ MyDiffMLP.PrepareData d:\Proga\AWID\MyDiffMLP\src\data_prep.jl:10
┌ Info: Data preparation...
└ @ MyDiffMLP.PrepareData d:\Proga\AWID\MyDiffMLP\src\data_prep.jl:17


In [None]:
# # If dataset is already prepared, you can load it directly
# using JLD2
# X_train = load("data/km2/imdb_dataset_prepared.jld2", "X_train")
# y_train = load("data/km2/imdb_dataset_prepared.jld2", "y_train")
# X_test = load("data/km2/imdb_dataset_prepared.jld2", "X_test")
# y_test = load("data/km2/imdb_dataset_prepared.jld2", "y_test")
# nothing

In [17]:
using Printf, Statistics, Random

function create_batches(X, Y; batchsize=64, shuffle=true)
    idxs = collect(1:size(X, 2))
    if shuffle
        Random.shuffle!(idxs)
    end
    return [(X[:, idxs[i:min(i+batchsize-1, end)]],
             Y[:, idxs[i:min(i+batchsize-1, end)]])
             for i in 1:batchsize:length(idxs)]
end
model = Chain(
    Dense(size(X_train, 1), 32, relu),
    Dense(32, 1, sigmoid)
)

function bce(ŷ, y)
    ϵ = 1e-7
    ŷ_clipped = clamp.(ŷ, ϵ, 1 .- ϵ)
    return -mean(y .* log.(ŷ_clipped) .+ (1 .- y) .* log.(1 .- ŷ_clipped))
end

function bce_grad(ŷ, y)
    ϵ = 1e-7
    return (ŷ .- y) ./ (clamp.(ŷ .* (1 .- ŷ), ϵ, 1.0)) ./ size(y, 2)
end

accuracy(ŷ, y) = mean((ŷ .> 0.5) .== (y .> 0.5))

epochs = 5
batchsize = 64
η = 0.001

params = parameters(model)
state = AdamState(params)

for epoch in 1:epochs
    total_loss = 0.0
    total_acc = 0.0
    num_batches = 0

    batches = create_batches(X_train, y_train; batchsize=batchsize)

    t = @elapsed begin
        for (x, y) in batches
    
            x_var = Variable(x, zeros(Float32, size(x)))  # Wrap x as a Variable
            out = model(x_var)
            graph = topological_sort(out)
            forward!(graph)

            ŷ = out.output
            l = bce(ŷ, y)
            total_loss += l
            total_acc += accuracy(ŷ, y)
            num_batches += 1

            out.gradient = bce_grad(ŷ, y)

            zero_gradients!(model)
            backward!(graph, out.gradient)
            update_adam!(state, params, η)

        end
    end

    train_loss = total_loss / num_batches
    train_acc = total_acc / num_batches

    # --- Evaluation ---
    x_test_var = Variable(X_test, zeros(Float32, size(X_test)))  # Wrap test data as a Variable
    out_eval = model(x_test_var)
    forward!(topological_sort(out_eval))
    test_pred = out_eval.output
    test_loss = bce(test_pred, y_test)
    test_acc = accuracy(test_pred, y_test)

    println(@sprintf("Epoch: %d (%.2fs) \tTrain: (l: %.2f, a: %.2f) \tTest: (l: %.2f, a: %.2f)",
        epoch, t, train_loss, train_acc, test_loss, test_acc))
end


Epoch: 1 (2.45s) 	Train: (l: 0.65, a: 0.81) 	Test: (l: 0.59, a: 0.85)
Epoch: 2 (2.16s) 	Train: (l: 0.47, a: 0.92) 	Test: (l: 0.44, a: 0.86)
Epoch: 3 (2.27s) 	Train: (l: 0.30, a: 0.94) 	Test: (l: 0.37, a: 0.87)
Epoch: 4 (2.32s) 	Train: (l: 0.21, a: 0.96) 	Test: (l: 0.33, a: 0.87)
Epoch: 5 (2.03s) 	Train: (l: 0.15, a: 0.97) 	Test: (l: 0.32, a: 0.87)
