In [None]:
import Pkg
Pkg.activate(".")
Pkg.status()

In [None]:
import HeatEquation as HE
import LinearAlgebra as LA
import ForwardDiff as FD
import ImplicitAD as IAD
import ReverseDiff as RD

In [None]:
using Plots
using Plots.PlotMeasures

In [None]:
function HE.convert_kappa(::Type{T}, kappa::Real) where T<:Real
    return kappa
end

function HE.build_2d_heat_csc(::Type{T}, kappa, dt, Nx, Ny) where T<:Real
    return HE.build_2d_heat_csc(Float64, kappa, dt, Nx, Ny)
end

function HE.gridpoint(dim::I, k::I, N::I, ::Type{T}) where {I<:Integer, T<:RD.TrackedReal}
    return HE.gridpoint(dim, k, N, RD.valtype(T))
end

function HE.gridpoint(dim::I, k::I, N::I, ::Type{T}) where {I<:Integer, T<:FD.Dual}
    return HE.gridpoint(dim, k, N, FD.valtype(T))
end

function HE.my_linear_solve!(
    u_sol::AbstractVector,
    A::Any,
    A_fact,
    b::AbstractVector,
)
    u_sol .= IAD.implicit_linear(A, b; Af=A_fact)
    return u_sol
end

In [None]:
function my_source(tk::T, xi::T, yj::T, a::S, b::S, r::S, h::S) where {T,S}
    # return T((xi - a)^2 + (yj - b)^2 <= r^2)
    # val = T((xi - a)^2 + (yj - b)^2 <= r^2)
    dr = sqrt((xi - a)^2 + (yj - b)^2)
    # h = 5.0
    val = max(0.0, h*(1.0 - dr / r))
    # val = exp(0.5*(-(xi - a)^2 - (yj - b)^2) / r^2)
    return val
end

In [None]:
function heat_setup_fd_cpu(
    u0::Matrix,
    kappa,
    interior::Function,
    tf::R,
    dt::R,
    Nx::I,
    Ny::I,
    save_rate::I,
    format::Symbol,
) where {I<:Integer, R<:Real}

    T = FD.valtype(eltype(u0))
    tf = convert(T, tf)
    dt = convert(T, dt)
    kappa = HE.convert_kappa(T, kappa)
    save_rate = convert(I, save_rate)

    return HE.heat_setup(u0, kappa, interior, tf, dt, Nx, Ny, save_rate, format)

end
    
function heat_setup_rd_cpu(
    u0::Matrix,
    kappa,
    interior::Function,
    tf::R,
    dt::R,
    Nx::I,
    Ny::I,
    save_rate::I,
    format::Symbol,
) where {I<:Integer, R<:Real}

    T = RD.valtype(eltype(u0))
    tf = convert(T, tf)
    dt = convert(T, dt)
    kappa = HE.convert_kappa(T, kappa)
    save_rate = convert(I, save_rate)

    return HE.heat_setup(u0, kappa, interior, tf, dt, Nx, Ny, save_rate, format)

end

function simulate_heat(x, p)
    (a, b, r, h) = x
    f(t, x, y) = my_source(t, x, y, a, b, r, h)
    (kappa, tf, dt, N) = p
    u0 = zeros(eltype(x), N, N)
    if eltype(x) <: AbstractFloat
        prob = HE.heat_setup_cpu(u0, kappa, f, tf, dt, N, N, -1, :csc)
    elseif eltype(x) <: FD.Dual
        prob = heat_setup_fd_cpu(u0, kappa, f, tf, dt, N, N, -1, :csc)
    else
        prob = heat_setup_rd_cpu(u0, kappa, f, tf, dt, N, N, -1, :csc)
        # prob = HE.heat_setup_cpu(u0, kappa, f, tf, dt, N, N, -1, :csc)
    end
    HE.heat_loop(prob, nothing; progress=false)
    return prob.uk
end

function cost_x(x, p)
    return 0.5*LA.norm(x,2)^2 - log(x[3])
    # return 0.5*LA.norm(x,2)^2 - 1.0 / x[3]^4
end

function cost_u(u, dx, N)
    # return 0.5*dx^2*LA.norm(u, 2)^2 # L^2 norm objective
    idx = convert(Int, floor(N / 2))
    # @show HE.gridpoint(1, idx, N, Float64)
    # @show HE.gridpoint(1, idx+1, N, Float64)
    # @show 0.25 * sum(u[idx:idx+1, idx:idx+1])
    return 0.25 * sum(u[idx:idx+1, idx:idx+1])
end

function cost(cu, cx)
    a = -1e0
    b = 1e-1
    return a*cu + b*cx
end

function obj(x, p)

    u_sol = simulate_heat(x, p)
    (kappa, tf, dt, N) = p
    dx = HE.gridsize(1, N, Float64)

    return cost(cost_u(u_sol, dx, N), cost_x(x, p))

end

function obj_svd(x, p)

    u_sol = simulate_heat(x, p)
    (kappa, tf, dt, N, cutoff) = p
    dx = HE.gridsize(1, N, Float64)

    my_svd = LA.svd(u_sol)
    idx = my_svd.S .< cutoff
    # @show length(my_svd.S) - sum(idx)
    Sigma = copy(my_svd.S)
    Sigma[idx] .= 0.0
    u_svd = my_svd.U * LA.diagm(Sigma) * my_svd.Vt

    return cost(cost_u(u_svd, dx, N), cost_x(x, p))

end

# Evaluate SVD Compression

In [None]:
N = 256
dt = 5e-2
# tf = 100*dt
tf = 10.0
kappa = 1.0
u0 = zeros(N,N)
# x0 = [0.0, 0.0, 0.95505] # Approximate solution
x0 = [0.9, -0.2, 0.5, 1.0]
ds = 1e-6
da = ds
db = ds
dr = ds
dh = ds
x1 = copy(x0)
x1[1] += da
x2 = copy(x0)
x2[2] += db
x3 = copy(x0)
x3[3] += dr
x4 = copy(x0)
x4[4] += dh
;

In [None]:
for k in 6:9
    @show 2^k
    f0 = obj(x0, (kappa, tf, dt, 2^k))
    c0 = cost_x(x0, ())
    @show f0, c0
end

In [None]:
(a,b) = HE.endpoints(1, Float64)
xgrid = (b - a) .* (1:N) ./ (N + 1) .+ a
(a,b) = HE.endpoints(2, Float64)
ygrid = (b - a) .* (1:N) ./ (N + 1) .+ a

my_heat_source = zeros(N, N)
for j in 1:N, i in 1:N
    xi = HE.gridpoint(1, i, N, Float64)
    yj = HE.gridpoint(1, j, N, Float64)
    # my_heat_source[j,i] = HeatEquation.interior(0.0, xi, yj)
    my_heat_source[j,i] = my_source(0.0, xi, yj, x0...)
end
heatmap(xgrid, ygrid, my_heat_source, size=(450,400))

In [None]:
# f(t, x, y) = my_source(t, x, y, x0...)
# @time usol = HE.run_heat_cpu(u0, kappa, tf, dt, N, N; f=f, save_rate=-1)
# @time usol[:]
# ;

In [None]:
u_sol = simulate_heat(x0, (kappa, tf, dt, N));

In [None]:
heatmap(xgrid, ygrid, u_sol')

In [None]:
# dx = 1 / N
# 0.5*dx^2*LA.norm(u_sol[:], 2)^2
dx = HE.gridsize(1, N, Float64)
@show cost_u(u_sol, dx, N)
@show cost_x(x0, ())
@show cost(cost_u(u_sol, dx, N), cost_x(x0, ()))
;

In [None]:
# # dx = 1 / N
# # 0.5*dx^2*LA.norm(u_sol[:], 2)^2
# xf = [0.0, 0.0, 100.0, 100.0]
# dx = HE.gridsize(1, N, Float64)
# u_sol = simulate_heat(xf, (kappa, tf, dt, N));
# @show cost_u(u_sol, dx, N)
# @show cost_x(xf, ())
# @show cost(cost_u(u_sol, dx, N), cost_x(xf, p))
# ;

In [None]:
FD.gradient(x->cost_x(x, ()), x0)

In [None]:
f0 = obj(x0, (kappa, tf, dt, N))
f1 = obj(x1, (kappa, tf, dt, N))
f2 = obj(x2, (kappa, tf, dt, N))
f3 = obj(x3, (kappa, tf, dt, N))
f4 = obj(x4, (kappa, tf, dt, N))
@show (f1 - f0) / da
@show (f2 - f0) / db
@show (f3 - f0) / dr
@show (f4 - f0) / dh
grad_fd = [(f1 - f0) / da, (f2 - f0) / db, (f3 - f0) / dr, (f4 - f0) / dh]
;

In [None]:
fs0 = obj_svd(x0, (kappa, tf, dt, N, 1e0))
fs1 = obj_svd(x1, (kappa, tf, dt, N, 1e0))
fs2 = obj_svd(x2, (kappa, tf, dt, N, 1e0))
fs3 = obj_svd(x3, (kappa, tf, dt, N, 1e0))
fs4 = obj_svd(x4, (kappa, tf, dt, N, 1e0))
@show (fs1 - fs0) / da
@show (fs2 - fs0) / db
@show (fs3 - fs0) / dr
@show (fs4 - fs0) / dh
grad_svd = [(fs1 - fs0) / da, (fs2 - fs0) / db, (fs3 - fs0) / dr, (fs4 - fs0) / dh]
;

In [None]:
grad_for = FD.gradient(x->obj(x, (kappa, tf, dt, N)), x0)

In [None]:
# k singular values
# k*N for U vectors
# k*N for Vt vectors
# 2kN + k = k(2N + 1)

In [None]:
cutoffs = [10.0^k for k in -6:1:2]
grad_err = zeros(length(cutoffs))
grad_err_l2 = zeros(length(cutoffs))
for (k,cutoff) in enumerate(cutoffs)
    println("**** cutoff = $cutoff ****")
    fs0 = obj_svd(x0, (kappa, tf, dt, N, cutoff))
    fs1 = obj_svd(x1, (kappa, tf, dt, N, cutoff))
    fs2 = obj_svd(x2, (kappa, tf, dt, N, cutoff))
    fs3 = obj_svd(x3, (kappa, tf, dt, N, cutoff))
    fs4 = obj_svd(x4, (kappa, tf, dt, N, cutoff))
    grad_svd = [(fs1 - fs0) / da, (fs2 - fs0) / db, (fs3 - fs0) / dr, (fs4 - fs0) / dh]
    grad_err[k] = LA.norm(grad_for - grad_svd, Inf)
    grad_err_l2[k] = LA.norm(grad_for - grad_svd, 2)
end

In [None]:
grad_err

In [None]:
dx = HE.gridsize(1, N, Float64);

In [None]:
LA.norm(grad_for,2)

In [None]:
LA.norm(grad_for, Inf)

In [None]:
p = scatter(cutoffs, grad_err, xscale=:log10, yscale=:log10, label="Abs", legend=:topleft)
scatter!(p, cutoffs, grad_err / LA.norm(grad_for,Inf), xscale=:log10, yscale=:log10, label="Rel")
plot!(p, fill(dx, 2), [1e-7, 2e-7], label=nothing)
plot!(p, fill(dx^2, 2), [1e-7, 2e-7], label=nothing)
plot!(p, cutoffs, cutoffs, label=nothing)

In [None]:
N

In [None]:
grad_err_l2

In [None]:
p = scatter(cutoffs, grad_err_l2, xscale=:log10, yscale=:log10, label="Abs", legend=:topleft)
scatter!(p, cutoffs, grad_err_l2 / LA.norm(grad_for,2), xscale=:log10, yscale=:log10, label="Rel")
plot!(p, fill(dx, 2), [1e-7, 2e-7], label=nothing)
plot!(p, fill(dx^2, 2), [1e-7, 2e-7], label=nothing)
plot!(p, cutoffs, cutoffs, label=nothing)

In [None]:
# FD.gradient(x->obj_svd(x, (kappa, tf, dt, N, 1e-3)), x0)

In [None]:
# RD.gradient(x->obj(x, (kappa, tf, dt, N)), x0)

In [None]:
# jtp = RD.JacobianTape(x->simulate_heat(x, (kappa, tf, dt, N)), x0)
# # RD.jacobian!(jtp, x0)

In [None]:
# jtp.tape

In [None]:
# sizeof(jtp.tape) / 1024^2

# Optimize

In [None]:
import Optim

In [None]:
tol = 1e-5
my_options = Optim.Options(
    g_abstol=tol,
    g_reltol=tol,
    outer_g_abstol=tol,
    outer_g_reltol=tol,
    store_trace=true,
    extended_trace=true,
    show_trace=true
)
lb = [-1.0, -1.0, 0.0, 0.0]
ub = [1.0, 1.0, Inf, Inf]
my_params = (kappa, tf, dt, N)
my_svd_params = (kappa, tf, dt, N, 1e-3)
;

In [None]:
my_objective(x) = obj(x, my_params)

res = Optim.optimize(
    my_objective,
    lb,
    ub,
    x0,
    Optim.Fminbox(Optim.BFGS()),
    my_options;
    autodiff = :forward, # uses ForwardDiff.jl
)
@show Optim.converged(res)
@show Optim.minimum(res)
;

In [None]:
@show Optim.converged(res)
@show Optim.minimum(res)
@show Optim.minimizer(res)
x_sol = Optim.minimizer(res)
;

In [None]:
function split_trace_variables(my_trace)

    n = length(my_trace)
    a = zeros(n)
    b = zeros(n)
    r = zeros(n)
    h = zeros(n)

    for ll in 1:n
        x = my_trace[ll].metadata["x"]
        a[ll] = x[1]
        b[ll] = x[2]
        r[ll] = x[3]
        h[ll] = x[4]
    end
    
    return (n, a, b, r, h)

end

In [None]:
function make_variable_plot(optim_trace, iter)
    (niter, a, b, r, h) = split_trace_variables(optim_trace)
    @assert(iter < niter)
    ymin = floor(min(minimum(a), minimum(b), minimum(r), minimum(h)))
    ymax = ceil(max(maximum(a), maximum(b), maximum(r), maximum(h)))
    p = plot(xticks=0:2:niter-1, xrange=(0, niter-1), yrange=(ymin, ymax), legend=:topleft)
    plot!(p, 0:iter, a[1:iter+1], label="a")
    plot!(p, 0:iter, b[1:iter+1], label="b")
    plot!(p, 0:iter, r[1:iter+1], label="r")
    plot!(p, 0:iter, h[1:iter+1], label="h")
    return p
end

function make_residual_plot(optim_trace, iter, tol)
    niter = length(optim_trace)
    res = getfield.(optim_trace, :g_norm)
    rmax = ceil(maximum(res))
    @assert(iter < niter)
    p = plot(xticks=0:2:niter-1, 
        xrange=(0, niter-1), yrange=(1e-1*tol, rmax), 
        yscale=:log10, legend=false
    )
    plot!(p, 0:iter, res[1:iter+1])
    return p
end

function make_source_plot(x0, N, hmax)

    (a,b) = HE.endpoints(1, Float64)
    xgrid = (b - a) .* (1:N) ./ (N + 1) .+ a
    (a,b) = HE.endpoints(2, Float64)
    ygrid = (b - a) .* (1:N) ./ (N + 1) .+ a
    
    my_heat_source = zeros(N, N)
    for j in 1:N, i in 1:N
        xi = HE.gridpoint(1, i, N, Float64)
        yj = HE.gridpoint(1, j, N, Float64)
        my_heat_source[j,i] = my_source(0.0, xi, yj, x0...)
    end

    p = plot(clim=(0.0, hmax))
    return heatmap!(p, xgrid, ygrid, my_heat_source)

end

function make_source_plot(optim_trace, iter, N, hmax)
    return make_source_plot(res.trace[iter+1].metadata["x"], N, hmax)
end

function make_stationary_plot(x0, N, params, umax)

    (a,b) = HE.endpoints(1, Float64)
    xgrid = (b - a) .* (1:N) ./ (N + 1) .+ a
    (a,b) = HE.endpoints(2, Float64)
    ygrid = (b - a) .* (1:N) ./ (N + 1) .+ a
    
    u_sol = simulate_heat(x0, params);

    return heatmap(xgrid, ygrid, u_sol', clim=(0.0, umax))

end

function make_stationary_plot(optim_trace, iter, N, params, umax)
    return make_stationary_plot(res.trace[iter+1].metadata["x"], N, params, umax)
end

In [None]:
function make_plot_group(optim_trace, iter, params, tol)

    (kappa, tf, dt, N) = params

    (niter, a, b, r, h) = split_trace_variables(res.trace)
    hmax = ceil(maximum(h))

    u_sol = simulate_heat(res.trace[end].metadata["x"], my_params)
    umax = ceil(maximum(u_sol))

    # my_layout = @layout([
    #     a{0.5w} [grid(2,1)]
    # ])

    vp = make_variable_plot(optim_trace, iter)
    rp = make_residual_plot(optim_trace, iter, tol)
    srp = make_source_plot(optim_trace, iter, N, hmax)
    stp = make_stationary_plot(optim_trace, iter, N, params, umax)

    p = plot(
        vp, rp, srp, stp,
        # layout=my_layout,
        layout=(2,2),
        size=(1050,800),
        suptitle="Iteration: $(iter)",
        left_margin=[3mm 0mm],
        right_margin=[3mm 3mm 3mm],
        bottom_margin=[3mm 3mm],
    )

    return p

end

In [None]:
function make_gif(optim_trace, gif_name::AbstractString, params, tol; fps::Int=2)
        
    ani = @animate for ll in 1:length(optim_trace)
        make_plot_group(optim_trace, ll - 1, params, tol)
    end

    return gif(ani, gif_name * ".gif", fps=fps)

end

In [None]:
make_gif(res.trace, "test", my_params, tol)

In [None]:
make_plot_group(res.trace, 12, my_params, tol)

In [None]:
# make_variable_plot(res.trace, 8)

In [None]:
# (niter, a, b, r, h) = split_trace_variables(res.trace)
# hmax = ceil(maximum(h))
# @show hmax
# make_source_plot(res.trace, 0, N, hmax)

In [None]:
# u_sol = simulate_heat(res.trace[end].metadata["x"], my_params)
# umax = ceil(maximum(u_sol))
# @show umax
# make_stationary_plot(res.trace, 0, N, my_params, umax)

In [None]:
# x64 = copy(x_sol)
# my_params = (kappa, tf, dt, 2*N)
# my_objective(x) = obj(x, my_params)

# res = Optim.optimize(
#     my_objective,
#     lb,
#     ub,
#     x64,
#     Optim.Fminbox(Optim.BFGS()),
#     my_options;
#     autodiff = :forward, # uses ForwardDiff.jl
# )
# @show Optim.converged(res)
# @show Optim.minimum(res)

# Optimize with SVD

In [None]:
# tol = 1e-5
# my_options = Optim.Options(
#     g_abstol=tol,
#     g_reltol=tol,
#     outer_g_abstol=tol,
#     outer_g_reltol=tol,
#     store_trace=true,
#     extended_trace=true,
#     show_trace=true
# )
# lb = [-1.0, -1.0, 0.0, 0.0]
# ub = [1.0, 1.0, Inf, Inf]
# my_params = (kappa, tf, dt, N)
my_svd_params = (kappa, tf, dt, N, 1e-3)
;

In [None]:
my_objective(x) = obj_svd(x, my_svd_params)

res = Optim.optimize(
    my_objective,
    lb,
    ub,
    x0,
    Optim.Fminbox(Optim.BFGS()),
    my_options;
    # autodiff = :forward, # uses ForwardDiff.jl
)
@show Optim.converged(res)
@show Optim.minimum(res)
;

In [None]:
x: [-1.0380537110616575e-7, 3.069467587899339e-6, 2.3777241964186038, 3.7649327152879044]

In [None]:
@show Optim.converged(res)
@show Optim.minimum(res)
@show Optim.minimizer(res)
x_sol = Optim.minimizer(res)
;