Document custom rrule trick for ComponentArray constructor #67

JinraeKim · 2023-06-25T13:03:30Z

Regarding #23, I was trying to re-implement this
in Julia.

I found that the use of ComponentArrays may not work properly with Zygote when the optimization parameters are given from another source (here, a neural network) as the following (with v0.5.0-DEV).

I think this is related to jonniedie/ComponentArrays.jl#176.
Therefore, it may be better to clarify the limitation of ComponentArrays for multiple arguments cases (or, just avoid this for now).

Case 1: with ComponentArrays (not working)

Code

using ImplicitDifferentiation
using Flux
using Convex, ECOS
using ComponentArrays
using Random


Random.seed!(2024)
i_max = 10
n = 3
m = 2
model = Chain(
              Dense(3, 64, Flux.leakyrelu),
              Dense(64, i_max*(m+1)),
             )
u_min = -2*ones(m)
u_max = +2*ones(m)


function minimise_logsumexp(θ; T, u_min, u_max, initial_guess, solver=() -> ECOS.Optimizer())
    # A = θ[:, 1:end-1]
    # B = θ[:, end]
    (; A, B) = θ
    m = size(A)[2]
    u = Convex.Variable(m)
    if initial_guess != nothing
        u.value = initial_guess
    end
    obj = T * Convex.logsumexp((1/T)*(A*u + B))
    prob = Convex.minimize(obj)
    prob.constraints += [u >= u_min]
    prob.constraints += [u <= u_max]
    solve!(prob, solver(), silent_solver=true, verbose=false)
    minimiser = typeof(u.value) <: Number ? [u.value] : u.value[:]  # to make it a vector
    return minimiser
end


function forward_cstr_optim(θ; kwargs...)
    u = minimise_logsumexp(θ; kwargs...)
    return u
end


function proj_hypercube(p; u_min, u_max)
    return max.(u_min, min.(u_max, p))
end

function conditions_cstr_optim(θ, u; kwargs...)
    (; A, B) = θ
    # A = θ[:, 1:end-1]
    # B = θ[:, end]
    ∇₂f = A' * Flux.softmax(A*u+B)
    η = 0.1
    return u .- proj_hypercube(u .- η .* ∇₂f; kwargs...)
end


function implicit_cstr_optim_func(θ; T, u_min, u_max, initial_guess, solver)
    tmp = ImplicitFunction(
                           θ -> forward_cstr_optim(θ; T, u_min, u_max, initial_guess, solver),
                           (θ, u) -> conditions_cstr_optim(θ, u; u_min, u_max),
                          )
    tmp(θ)
end

x = 100*(2*rand(n) .- 1)
val, grads = Flux.withgradient(model) do _model
    AB = reshape(_model(x), i_max, m+1)
    A = AB[:, 1:m]
    B = AB[:, m+1]
    θ = ComponentArray(A=A, B=B)
    # θ = reshape(_model(x), i_max, m+1)
    u_star = implicit_cstr_optim_func(θ; T=1, u_min=-ones(m), u_max=+ones(m), initial_guess=nothing, solver=() -> ECOS.Optimizer())
    sum(u_star)
end

Result

ERROR: Mutating arrays is not supported -- called push!(Vector{Any}, ...)
This error occurs when you ask Zygote to differentiate operations that change
the elements of arrays in place (e.g. setting values with x .= ...)

Possible fixes:
- avoid mutating operations (preferred)
- or read the documentation and solutions for this error
  https://fluxml.ai/Zygote.jl/latest/limitations

Case 2: without ComponentArrays (working)

Code

using ImplicitDifferentiation
using Flux
using Convex, ECOS
using ComponentArrays
using Random


Random.seed!(2024)
i_max = 10
n = 3
m = 2
model = Chain(
              Dense(3, 64, Flux.leakyrelu),
              Dense(64, i_max*(m+1)),
             )
u_min = -2*ones(m)
u_max = +2*ones(m)


function minimise_logsumexp(θ; T, u_min, u_max, initial_guess, solver=() -> ECOS.Optimizer())
    # A = θ[:, 1:end-1]
    # B = θ[:, end]
    (; A, B) = θ
    m = size(A)[2]
    u = Convex.Variable(m)
    if initial_guess != nothing
        u.value = initial_guess
    end
    obj = T * Convex.logsumexp((1/T)*(A*u + B))
    prob = Convex.minimize(obj)
    prob.constraints += [u >= u_min]
    prob.constraints += [u <= u_max]
    solve!(prob, solver(), silent_solver=true, verbose=false)
    minimiser = typeof(u.value) <: Number ? [u.value] : u.value[:]  # to make it a vector
    return minimiser
end


function forward_cstr_optim(θ; kwargs...)
    u = minimise_logsumexp(θ; kwargs...)
    return u
end


function proj_hypercube(p; u_min, u_max)
    return max.(u_min, min.(u_max, p))
end

function conditions_cstr_optim(θ, u; kwargs...)
    # (; A, B) = θ
    A = θ[:, 1:end-1]
    B = θ[:, end]
    ∇₂f = A' * Flux.softmax(A*u+B)
    η = 0.1
    return u .- proj_hypercube(u .- η .* ∇₂f; kwargs...)
end


function implicit_cstr_optim_func(θ; T, u_min, u_max, initial_guess, solver)
    tmp = ImplicitFunction(
                           θ -> forward_cstr_optim(θ; T, u_min, u_max, initial_guess, solver),
                           (θ, u) -> conditions_cstr_optim(θ, u; u_min, u_max),
                          )
    tmp(θ)
end

x = 100*(2*rand(n) .- 1)
val, grads = Flux.withgradient(model) do _model
    AB = reshape(_model(x), i_max, m+1)
    # A = AB[:, 1:m]
    # B = AB[:, m+1]
    # θ = ComponentArray(A=A, B=B)
    θ = reshape(_model(x), i_max, m+1)
    u_star = implicit_cstr_optim_func(θ; T=1, u_min=-ones(m), u_max=+ones(m), initial_guess=nothing, solver=() -> ECOS.Optimizer())
    sum(u_star)
end

Result

(val = 1.2111089906277515, grad = ((layers = ((weight = Float32[-1.9894879 0.016161848 -1.9257156; -0.009999948 8.12358f-5 -0.009679403; … ; 0.31266153 -0.0025399441 0.30263928; -1.2044753 0.009784702 -1.1658663], bias = Float32[-0.026939616, -0.0001354091, 0.0067554815, -0.012562656, -0.0116912015, -0.0072106766, -0.019949805, -0.012255933, -0.008887393, -0.00019493952  …  8.364284f-6, 0.0029845645, 0.00020423913, -0.00016008505, -0.00011763882, 0.01743382, 0.015867122, 0.013238616, 0.0042337435, -0.016309775], σ = nothing), (weight = Float32[2.7479534f-31 -8.035564f-33 … 6.655762f-31 8.295855f-31; 4.978378f-17 -1.455777f-18 … 1.2058026f-16 1.502933f-16; … ; 5.054912f-10 -1.478157f-11 … 1.2243399f-9 1.5260381f-9; 3.615537f-16 -1.0572551f-17 … 8.757118f-16 1.0915021f-15], bias = Float32[2.8529254f-32, 5.168552f-18, 1.702376f-20, -0.011397834, 0.012743557, 3.4305703f-10, 2.0157113f-23, -0.010961587, 9.4864516f-12, 7.707587f-18  …  1.4720016f-31, 2.3545986f-17, 7.705814f-20, -0.035908565, 0.057037998, 1.6192033f-9, 9.8762665f-23, -0.021129435, 5.2480097f-11, 3.7536507f-17], σ = nothing)),),))

The text was updated successfully, but these errors were encountered:

mohamed82008 · 2023-06-25T19:11:52Z

try defining a function like https://github.com/mohamed82008/DifferentiableFactorizations.jl/blob/main/src/DifferentiableFactorizations.jl#L68 to workaround the ComponentArrays issue

gdalle · 2023-06-25T19:22:21Z

@mohamed82008 can you explain why this works? it might be a good idea to document it

mohamed82008 · 2023-06-25T19:29:29Z

I have an rrule defined for it.

JinraeKim · 2023-06-26T00:54:23Z

@mohamed82008
The following code gave me the same error.

Did I do something wrong...?

using ImplicitDifferentiation
using Flux
using Convex, ECOS
using ComponentArrays
using Random


Random.seed!(2024)
i_max = 10
n = 3
m = 2
model = Chain(
              Dense(3, 64, Flux.leakyrelu),
              Dense(64, i_max*(m+1)),
             )
u_min = -2*ones(m)
u_max = +2*ones(m)


function minimise_logsumexp(θ; T, u_min, u_max, initial_guess, solver=() -> ECOS.Optimizer())
    # A = θ[:, 1:end-1]
    # B = θ[:, end]
    (; A, B) = θ
    m = size(A)[2]
    u = Convex.Variable(m)
    if initial_guess != nothing
        u.value = initial_guess
    end
    obj = T * Convex.logsumexp((1/T)*(A*u + B))
    prob = Convex.minimize(obj)
    prob.constraints += [u >= u_min]
    prob.constraints += [u <= u_max]
    solve!(prob, solver(), silent_solver=true, verbose=false)
    minimiser = typeof(u.value) <: Number ? [u.value] : u.value[:]  # to make it a vector
    return minimiser
end


function forward_cstr_optim(θ; kwargs...)
    u = minimise_logsumexp(θ; kwargs...)
    z = 0
    return u, z
end


function proj_hypercube(u; u_min, u_max)
    return max.(u_min, min.(u_max, u))
end

function conditions_cstr_optim(θ, u, z; kwargs...)
    (; A, B) = θ
    # A = θ[:, 1:end-1]
    # B = θ[:, end]
    ∇₂f = A' * Flux.softmax(A*u+B)
    η = 0.1
    return u .- proj_hypercube(u .- η .* ∇₂f; kwargs...)
end


function implicit_cstr_optim_func(θ; T, u_min, u_max, initial_guess, solver)
    tmp = ImplicitFunction(
                           θ -> forward_cstr_optim(θ; T, u_min, u_max, initial_guess, solver),
                           (θ, u, z) -> conditions_cstr_optim(θ, u, z; u_min, u_max),
                          )
    tmp(θ)[1]
end

x = 100*(2*rand(n) .- 1)
val, grads = Flux.withgradient(model) do _model
    AB = reshape(_model(x), i_max, m+1)
    A = AB[:, 1:m]
    B = AB[:, m+1]
    θ = ComponentVector(; A=A, B=B)
    # θ = reshape(_model(x), i_max, m+1)
    u_star = implicit_cstr_optim_func(θ; T=1, u_min=-ones(m), u_max=+ones(m), initial_guess=nothing, solver=() -> ECOS.Optimizer())
    sum(u_star)
end

gdalle · 2023-06-26T05:21:59Z

I can't seem to find exactly what you changed, but you still have the constructor ComponentVector in the function you try to differentiate with Zygote.jl. Mohamed's workaround involves defining a function comp_vec that calls this constructor in one specific case, and then defining an rrule for comp_vec so that Zygote.jl stops complaining.
Of course you could also define an rrule directly for the constructor, but

this would be type piracy
you would probably have to deal with lots of different methods, whereas comp_vec only has one

JinraeKim · 2023-06-26T06:23:42Z

@gdalle Ah, I see. Thank you for your explanation!
I was thinking that the comp_vec-like modification is already implemented in this package in an out-of-box sense.
As you pointed out, this would be great to document this as the use of (the normal) ComponentArrays constructor is introduced for multiple arguments here, which may show this kind of error for the future readers (like me) :>

gdalle · 2023-08-09T20:32:34Z

Fixed by #100

JinraeKim mentioned this issue Jun 25, 2023

Construction of ComponentArray inside of AD/Zygote jonniedie/ComponentArrays.jl#176

Open

JinraeKim mentioned this issue Jun 25, 2023

How to do implicit differentiation for neural network parameters? #23

Closed

JinraeKim closed this as completed Jun 26, 2023

gdalle changed the title ~~Multiple arguments using ComponentArrays may not work when parsing optimization parameters given from other neural network~~ Document custom rrule trick for ComponentArray constructor Jun 26, 2023

gdalle reopened this Jun 26, 2023

gdalle added the documentation Improvements or additions to documentation label Jul 30, 2023

gdalle added this to the v0.5.0 milestone Aug 4, 2023

gdalle closed this as completed Aug 9, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Document custom rrule trick for ComponentArray constructor #67

Document custom rrule trick for ComponentArray constructor #67

JinraeKim commented Jun 25, 2023 •

edited

mohamed82008 commented Jun 25, 2023

gdalle commented Jun 25, 2023

mohamed82008 commented Jun 25, 2023

JinraeKim commented Jun 26, 2023

gdalle commented Jun 26, 2023

JinraeKim commented Jun 26, 2023

gdalle commented Aug 9, 2023

Document custom rrule trick for ComponentArray constructor #67

Document custom rrule trick for ComponentArray constructor #67

Comments

JinraeKim commented Jun 25, 2023 • edited

Case 1: with ComponentArrays (not working)

Code

Result

Case 2: without ComponentArrays (working)

Code

Result

mohamed82008 commented Jun 25, 2023

gdalle commented Jun 25, 2023

mohamed82008 commented Jun 25, 2023

JinraeKim commented Jun 26, 2023

gdalle commented Jun 26, 2023

JinraeKim commented Jun 26, 2023

gdalle commented Aug 9, 2023

JinraeKim commented Jun 25, 2023 •

edited