Skip to content

Potential gradient issues with Flux chains when changing parameter type #533

@ChrisRackauckas

Description

@ChrisRackauckas

MWE:

using DiffEqFlux, Flux, NeuralPDE, ModelingToolkit, DomainSets, Optimization, OptimizationFlux, Test

@parameters x y
@variables u(..)
Dxx = Differential(x)^2
Dyy = Differential(y)^2

# 2D PDE
eq  = Dxx(u(x,y)) + Dyy(u(x,y)) ~ -sin(pi*x)*sin(pi*y)

# Initial and boundary conditions
bcs = [u(0,y) ~ 0.0, u(1,y) ~ -sin(pi*1)*sin(pi*y),
       u(x,0) ~ 0.0, u(x,1) ~ -sin(pi*x)*sin(pi*1)]
# Space and time domains
domains = [x  Interval(0.0,1.0),
           y  Interval(0.0,1.0)]

@named pde_system = PDESystem(eq,bcs,domains,[x,y],[u(x, y)])

fastchain = FastChain(FastDense(2,12,Flux.σ),FastDense(12,12,Flux.σ),FastDense(12,1))
fluxchain = Chain(Dense(2,12,Flux.σ),Dense(12,12,Flux.σ),Dense(12,1))
initθ = Float64.(DiffEqFlux.initial_params(fastchain))
grid_strategy = NeuralPDE.GridTraining(0.1)

p,re = Flux.destructure(fluxchain)

discretization1 = NeuralPDE.PhysicsInformedNN(fastchain,
                                             grid_strategy;
                                             init_params = initθ)

discretization2 = NeuralPDE.PhysicsInformedNN(fluxchain,
                                             grid_strategy;
                                             init_params = initθ)


prob1 = NeuralPDE.discretize(pde_system,discretization1)
prob2 = NeuralPDE.discretize(pde_system,discretization2)
sym_prob = NeuralPDE.symbolic_discretize(pde_system,discretization1)

Zygote.gradient((x)->prob1.f(x,nothing),initθ)
Zygote.gradient((x)->prob2.f(x,nothing),initθ) # Very very different???

function callback(p,l)
    @show l
    false
end
res = Optimization.solve(prob1, ADAM(0.1); callback=callback,maxiters=1000)
phi = discretization1.phi

xs,ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
analytic_sol_func(x,y) = (sin(pi*x)*sin(pi*y))/(2pi^2)

u_predict = reshape([first(phi([x,y],res.minimizer)) for x in xs for y in ys],(length(xs),length(ys)))
u_real = reshape([analytic_sol_func(x,y) for x in xs for y in ys], (length(xs),length(ys)))
diff_u = abs.(u_predict .- u_real)

@show maximum(abs2,u_predict - u_real)
@test u_predict  u_real atol = 2.0

res = Optimization.solve(prob2, ADAM(0.1); callback=callback,maxiters=1000)
phi = discretization2.phi

xs,ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
analytic_sol_func(x,y) = (sin(pi*x)*sin(pi*y))/(2pi^2)

u_predict = reshape([first(phi([x,y],res.minimizer)) for x in xs for y in ys],(length(xs),length(ys)))
u_real = reshape([analytic_sol_func(x,y) for x in xs for y in ys], (length(xs),length(ys)))
diff_u = abs.(u_predict .- u_real)

@show maximum(abs2,u_predict - u_real)
@test u_predict  u_real atol = 2.0

See fluxchain fails and the gradient is off.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions