ADAMW not stable

```Julia
m = Chain(
    Flux.Embedding(501102, 32),
    Flux.Conv((32,), 64=>235),
    Flux.flatten,
    Flux.softmax,
) |> gpu

function evalcb()
    ps = []
    ls = []
    for (d, l) in dl_test
        p = m(Flux.batch(map(random_select_sect, d)) |> gpu) |> cpu
        p = map(x->x[1], reshape(argmax(p, dims=1), :))
        append!(ps, p)
        append!(ls, l)
    end
    acc = sum(ps .== ls) / length(ps)
    println("accurate: $(acc)")
end

loss(x,y) =  Flux.Losses.focal_loss(m(reduce(hcat, map(random_select_sect, x)) |> gpu), Flux.onehotbatch(y, 1:235)|>gpu)

Flux.@epochs 50 Flux.Optimise.train!(loss,
                                    params(m),
                                    dl_train, 
                                    Flux.Optimise.ADAMW(0.0001,(0.9, 0.999), 0.01),
                                    cb=Flux.throttle(evalcb, 5))
```
Loss gets to be nan while raining on dataset wili-2018, about epoch 40, test data set accurate>0.8.
I am sure its a problem, because it runs well in pytorch.

By the way, `hcat(map(x...))` is very slow!

Flux.batch(map(f, d)) is very fast, but will cause an error named `Mutating arrays not supported `


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

ADAMW not stable #1920

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

ADAMW not stable #1920

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions