# Try autodiff with multivariate GWAS

1. We want to estimate a general covariance matrix $\Gamma = LL^t$
2. The loglikelihood therefore is a function of $\beta$ and $L$
3. We need to be able to:
    + autodiff a logl function that calls BLAS internally and has in-place operations
    + autodiff a logl function that takes $L$ (cholesky factor) as input

In [18]:
using Enzyme
using LinearAlgebra
using BenchmarkTools

"""
    vech!(v::AbstractVector, A::AbstractVecOrMat)
    vech!(v::AbstractVector, A::Cholesky)

Overwrite vector `v` by the entries from lower triangular part of `A`. 
Source = https://github.com/OpenMendel/WiSER.jl/blob/77e723b4769eb54f9eaa72aab038b4b5366365cd/src/multivariate_calculus.jl#L2
"""
function vech!(v::AbstractVector, A::AbstractVecOrMat)
    m, n = size(A, 1), size(A, 2)
    idx = 1
    @inbounds for j in 1:n, i in j:m
        v[idx] = A[i, j]
        idx += 1
    end
    v
end
function vech!(v::AbstractVector, L::Cholesky)
    Ldata = L.factors
    if L.uplo === 'L'
        vech!(v, Ldata)
    else
        error("L.uplo !== 'L'! Construct cholesky factors using cholesky(x, :L)")
    end
    return v
end

"""
    un_vech!(A::AbstractMatrix, v::AbstractVector)
    un_vech!(A::Cholesky, v::AbstractVector)

Overwrite lower triangular part of `A` by the entries from `v`. Upper triangular
part of `A` is untouched.  
"""
function un_vech!(A::AbstractMatrix, v::AbstractVector)
    m, n = size(A, 1), size(A, 2)
    idx = 1
    @inbounds for j in 1:n, i in j:m
        A[i, j] = v[idx]
        idx += 1
    end
    A
end
function un_vech!(L::Cholesky, v::AbstractVector)
    un_vech!(L.factors, v)
end

"""
    vech(A::AbstractVecOrMat) -> AbstractVector

Return the entries from lower triangular part of `A` as a vector.
Source = https://github.com/OpenMendel/WiSER.jl/blob/77e723b4769eb54f9eaa72aab038b4b5366365cd/src/multivariate_calculus.jl#L2
"""
function vech(A::AbstractVecOrMat)
    m, n = size(A, 1), size(A, 2)
    vech!(similar(A, n * m - (n * (n - 1)) >> 1), A)
end


vech

## `Enzyme.jl` with BLAS

In [3]:
# objective = 0.5 || y - X*beta ||^2
function ols(y, X, beta, storage=zeros(size(X, 1)))
    mul!(storage, X, beta)
    storage .= y .- storage
    return 0.5 * sum(abs2, storage)
end

# simulate data
n = 10000
p = 50
X = randn(n, p)
y = randn(n)
beta = randn(p)
storage = zeros(n)
ols(y, X, beta)

# autodiff grad (precompile)
grad_storage = zeros(length(beta))
Enzyme.autodiff(
    Reverse, ols, 
    Const(y), 
    Const(X), 
    Duplicated(beta, grad_storage), 
    Duplicated(storage, zero(storage))
)
grad_storage .= 0

# time
@time Enzyme.autodiff(
    Reverse, ols, 
    Const(y), 
    Const(X), 
    Duplicated(beta, grad_storage), 
    Duplicated(storage, zero(storage))
)

# analytical grad
@time true_grad = -X' * (y - X*beta);

# check answers
[true_grad grad_storage]

  0.000588 seconds (6 allocations: 78.266 KiB)
  0.000427 seconds (9 allocations: 3.968 MiB)


[33m[1m└ [22m[39m[90m@ Enzyme.Compiler ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:59[39m


50×2 Matrix{Float64}:
  10394.5    10394.5
  -9499.72   -9499.72
   1758.3     1758.3
  -9888.78   -9888.78
  -3352.47   -3352.47
   2688.75    2688.75
  -4977.85   -4977.85
  -1718.61   -1718.61
   2680.03    2680.03
  -3743.46   -3743.46
  -2161.22   -2161.22
  14151.1    14151.1
   1875.09    1875.09
      ⋮     
   1365.16    1365.16
 -11158.0   -11158.0
 -12188.4   -12188.4
  -8963.68   -8963.68
   3879.96    3879.96
   3234.32    3234.32
  -9421.35   -9421.35
  -9140.11   -9140.11
 -13256.7   -13256.7
   2129.72    2129.72
 -18850.8   -18850.8
  -3807.75   -3807.75

## DifferentianInterface with BLAS

In [4]:
# objective = 0.5 || y - X*beta ||^2
function ols(y, X, beta, storage=zeros(size(X, 1)))
    mul!(storage, X, beta)
    storage .= y .- storage
    return 0.5 * sum(abs2, storage)
end

# simulate data
n = 10000
p = 50
X = randn(n, p)
y = randn(n)
beta = randn(p)
storage = zeros(n)
ols(y, X, beta)

# differention interface
ols(β) = ols(y, X, β, storage)
beta = zeros(p)
DifferentiationInterface.gradient(ols, AutoEnzyme(), beta)

[33m[1m└ [22m[39m[90m@ Enzyme.Compiler ~/.julia/packages/GPUCompiler/kqxyC/src/utils.jl:59[39m


50-element Vector{Float64}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 ⋮
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

## `Enzyme.jl` with cholesky inputs

Suppose our objective is

$$f(\beta, L) = \frac{1}{2}\|y - X\beta\|^2_2 + \|vech(L)\|^2$$

where $L$ represents the cholesky factorization of some symmetric PD matrix.

In [2]:
# objective = 0.5 || y - X*beta ||^2
function f(y, X, vechL, beta, storage=zeros(size(X, 1)))
    mul!(storage, X, beta)
    storage .= y .- storage
    return 0.5 * sum(abs2, storage) + sum(abs2, vechL)
end

# helper functions
function vech!(v::AbstractVector, A::AbstractVecOrMat)
    m, n = size(A, 1), size(A, 2)
    idx = 1
    @inbounds for j in 1:n, i in j:m
        v[idx] = A[i, j]
        idx += 1
    end
    v
end
function vech!(v::AbstractVector, L::Cholesky)
    Ldata = L.factors
    if L.uplo === 'L'
        vech!(v, Ldata)
    else
        error("L.uplo !== 'L'! Construct cholesky factors using cholesky(x, :L)")
    end
    return v
end

# simulate data
n = 10000
p = 50
X = randn(n, p)
y = randn(n)
beta = randn(p)
storage = zeros(n)
L = cholesky(Symmetric(X'*X, :L))
vechL = zeros((p * (p+1) >> 1))
vech!(vechL, L)
f(y, X, vechL, beta)

787870.1077377998

In [18]:
# autodiff grad wrt beta
grad_storage = zeros(length(beta))
@time Enzyme.autodiff(
    Reverse, f, 
    Const(y), 
    Const(X), 
    Const(vechL), 
    Duplicated(beta, grad_storage), 
    Duplicated(storage, zero(storage))
)

# analytical grad
@time true_grad = -X' * (y - X*beta);

# check answers
[true_grad grad_storage]

  0.000831 seconds (7 allocations: 78.281 KiB)
  0.000591 seconds (9 allocations: 3.968 MiB)


50×2 Matrix{Float64}:
  14890.3    14890.3
  10285.8    10285.8
 -26034.3   -26034.3
   6992.24    6992.24
  -1519.94   -1519.94
  -6055.18   -6055.18
 -13556.5   -13556.5
  28973.6    28973.6
  -7394.89   -7394.89
 -14834.1   -14834.1
 -20203.1   -20203.1
   5781.82    5781.82
  22452.7    22452.7
      ⋮     
 -12271.6   -12271.6
   5718.2     5718.2
  -9557.47   -9557.47
 -12149.0   -12149.0
  15954.4    15954.4
   2491.68    2491.68
   3878.5     3878.5
  -1308.08   -1308.08
  14682.4    14682.4
  10457.0    10457.0
 -11036.1   -11036.1
  -1376.77   -1376.77

In [19]:
# autodiff grad wrt L
gradL = zeros((p * (p+1) >> 1))

@time Enzyme.autodiff(
    Reverse, f, 
    Const(y), 
    Const(X), 
    Duplicated(vechL, gradL), 
    Const(beta), 
    Const(storage)
)
gradL

  0.000178 seconds (5 allocations: 96 bytes)


1250-element Vector{Float64}:
 198.978438493076
  -3.005542319968958
  -0.0825790062683518
   1.0329102936395052
  -0.6551968970842044
  -0.5547129753000754
  -0.06393618603027097
  -2.4727939986170666
   1.3388437722889768
   0.753763500042582
  -1.757953505419059
  -4.740126646347488
   2.1198570600669284
   ⋮
  -4.861655513503298
 200.05821860257626
   0.6786321432915907
   0.09306695899661727
  -3.3575663500732804
  -1.6330792230109883
   1.7992931683189446
   0.8905176836865358
   1.5859112930741317
 198.08436377096575
   2.7783077526059974
   0.41758512181906965

### Rewrite objective to use only 1 input `par`

In [22]:
# objective = 0.5 || y - X*beta ||^2 + ||vech(L)||^2
function f(y, X, par, storage=zeros(size(X, 1)))
    n, p = size(X)
    beta = @view(par[1:p])
    vechL = @view(par[p+1:end])
    mul!(storage, X, beta)
    storage .= y .- storage
    return 0.5 * sum(abs2, storage) + sum(abs2, vechL)
end

f (generic function with 3 methods)

In [24]:
# autodiff grad wrt beta & vechL
grad_storage = zeros(length(beta) + length(vechL))
par = [beta; vechL]
@time Enzyme.autodiff(
    Reverse, f, 
    Const(y), 
    Const(X), 
    Duplicated(par, grad_storage), 
    Duplicated(storage, zero(storage))
)
grad_storage[1:p] ≈ true_grad

  0.000797 seconds (6 allocations: 78.266 KiB)


true

In [25]:
grad_storage[p+1:end] ≈ gradL

true

## Iterating lower triangular matrix

In [23]:
# hua's code
function vech(A::AbstractVecOrMat)
    m, n = size(A, 1), size(A, 2)
    vech!(similar(A, n * m - (n * (n - 1)) >> 1), A)
end
function vech!(v::AbstractVector, A::AbstractVecOrMat)
    m, n = size(A, 1), size(A, 2)
    idx = 1
    @inbounds for j in 1:n, i in j:m
        v[idx] = A[i, j]
        idx += 1
    end
    v
end

# vech routines for cholesky factors
function vech!(v::Vector{Float64}, L::Cholesky)
    d = size(L, 1)
    idx = 1
    if L.uplo === 'L'
        Ldata = L.factors
        for j in 1:d, i in j:d
            v[idx] = Ldata[i, j]
            idx += 1
        end
    else
        error("L.uplo !== 'L'! Construct cholesky factors using cholesky(x, :L)")
    end
    return v
end

vech! (generic function with 2 methods)

In [24]:
# data
n = 1000
x = randn(n, n)
sigma = Symmetric(x'*x, :L)
L = cholesky(sigma)

# test cholesky
@show all(L.L*Transpose(L.L) .≈ sigma)

# check answers
v1 = zeros((n * (n+1)) >> 1)
vech!(v1, L)
@show all(v1 .== vech(L.L))

# timings
@benchmark vech!($v1, $L)

all(L.L * Transpose(L.L) .≈ sigma) = true
all(v1 .== vech(L.L)) = true


BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m93.084 μs[22m[39m … [35m196.542 μs[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m93.666 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m94.178 μs[22m[39m ± [32m  2.442 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m▂[39m▇[39m█[34m▇[39m[39m▄[32m▂[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m▂[39m▂[39m▃[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂
  [39m█[39m█[39m█[34m█[39m

In [26]:
b = @allocated vech!(v1, L);
b

0

## Compute $tr(\Gamma)$ given $\Gamma = LL'$

In [17]:
x = randn(5, 5)
sigma = Symmetric(x'*x, :L)
L = cholesky(sigma)

tr(sigma)

18.204814183212857

In [21]:
LinearAlgebra.tr(L::Cholesky) = sum(abs2, vec(L.L))
tr(L)

18.204814183212857

In [25]:
function tr2(L::Cholesky)
    s = zero(eltype(L.factors))
    for Lij in LowerTriangular(L.factors)
        s += abs2(Lij)
    end
    return s
end
@btime tr2($L)

  32.737 ns (0 allocations: 0 bytes)


18.204814183212857

## Enzyme.jl on `Cholesky`s

In [2]:
function f(par::Vector, beta_storage::Vector, L_storage::Cholesky)
    p = length(beta_storage)
    beta_storage .= par[1:p]          # copy elements of `par` to `beta_storage`
    un_vech!(L_storage, par[p+1:end]) # copy elements of `par` to `L_storage`
    return f(beta_storage, L_storage)
end
function f(beta::Vector, L::Cholesky)
    vechL = vech(L.L)
    return 0.5 * (sum(abs2, beta) + sum(abs2, vechL))
end

f (generic function with 2 methods)

In [3]:
# evaluate obj
beta = randn(5)
x = randn(5, 5)
L = cholesky(Symmetric(x'*x, :L))
f(beta, L)

16.0039780498943

In [4]:
par = [beta; vech(L.L)]
beta .= 0       # destroy beta
L.factors .= 0  # destroy L
f(par, beta, L) # check objective is the same

16.0039780498943

In [5]:
# differentiate f(par, beta, L) with respect to par
grad_storage = zeros(length(par))
beta_storage = similar(beta)
L_storage = cholesky(Symmetric(x'*x, :L))  # Dummy initialization
Enzyme.autodiff(
    Reverse, f, 
    Duplicated(par, grad_storage),
    Duplicated(beta_storage, copy(beta_storage)), 
    Duplicated(L_storage, copy(L_storage))
)
grad_storage

20-element Vector{Float64}:
 -0.4328244198389204
  1.7568333325595742
 -0.6032695430036253
 -0.35899553893071334
 -1.2367436525087776
  4.437233082178529
  0.2784219130963045
 -3.5871067916762884
  0.23156177976363368
 -2.025996374320321
  3.8996960669664724
 -1.500879349830168
  2.113128000492451
 -0.8035957402607469
  2.2656624712795845
 -2.4021367716300173
  0.9931116778274178
  1.065913083291953
 -1.3442657287242177
  5.714082490206185

## Enzyme.jl on `struct`s

+ In practice, data and intermediate variables are often stored in `struct`s
    + Some fields are fixed, e.g. data
    + Some fields are not, e.g. temporary storages
+ Can we autodiff through these structs?

In [6]:
struct MyDataStruct
    X::Matrix{Float64}
    y::Vector{Float64}
end

struct MyStruct
    data::MyDataStruct
    beta::Vector{Float64}
    L::Cholesky{Float64, Matrix{Float64}}
    storage::Vector{Float64}
end

In [9]:
function ols(par, data::MyDataStruct)
    X = data.X
    y = data.y
    n, p = size(X)
    beta = @view(par[1:p])
    vechL = @view(par[p+1:end])
    
    # allocate all the temporary storages needed
    L = cholesky(randn(p, p), check=false)
    un_vech!(L, vechL)
    r = y - X*beta
    
    return 0.5 * sum(abs2, r) + logdet(L)
end

# simulate data
n = 10000
p = 50
X = randn(n, p)
y = randn(n)
beta = randn(p)
storage = zeros(n)
data = MyDataStruct(X, y)
L = cholesky(Symmetric(X'*X, :L))
s = MyStruct(data, beta, L, storage)
par = [beta; vech(L.L)]
ols(par, data)

243607.98092798426

In [13]:
# autodiff through struct
grad_storage = zeros(length(par))
@time Enzyme.autodiff(
    Reverse, ols, 
    Duplicated(par, grad_storage), 
    Const(data),
)

# analytical grad
@time true_grad = -X' * (y - X*beta);

# check answers
[true_grad[1:p] grad_storage[1:p]]

  0.000646 seconds (31 allocations: 372.531 KiB)
  0.012192 seconds (9 allocations: 3.968 MiB)


50×2 Matrix{Float64}:
  10875.8     10875.8
   7397.42     7397.42
  -3686.16    -3686.16
  -6935.19    -6935.19
   4511.6      4511.6
 -12729.9    -12729.9
   3680.21     3680.21
   -828.017    -828.017
   1867.43     1867.43
  14481.1     14481.1
   4089.1      4089.1
  11120.5     11120.5
   1886.01     1886.01
      ⋮      
 -11548.8    -11548.8
  -4045.51    -4045.51
   8216.68     8216.68
   9122.9      9122.9
 -11029.6    -11029.6
 -15141.6    -15141.6
   3569.41     3569.41
 -18680.5    -18680.5
   3727.33     3727.33
   7264.5      7264.5
  19416.0     19416.0
   9707.4      9707.4

In [14]:
grad_storage[p+1:end]

1275-element Vector{Float64}:
 0.020094456598901383
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 ⋮
 0.0
 0.0
 0.020166284363359866
 0.0
 0.0
 0.0
 0.019983043808502302
 0.0
 0.0
 0.020059291836159857
 0.0
 0.020048841239755876

## Enzyme.jl on MultivariateCopulaModel

In [26]:
using Distributions
using GLM
using Random
using LinearAlgebra
using Enzyme

struct MultivariateCopulaData{T, D, L}
    # data
    Y::Matrix{T}    # n × d matrix of phenotypes, each row is a sample phenotype
    X::Matrix{T}    # n × p matrix of non-genetic covariates, each row is a sample covariate
    vecdist::Vector{D} # length d vector of marginal distributions for each phenotype
    veclink::Vector{L} # length d vector of link functions for each phenotype's marginal distribution
    # data dimension
    n::Int # sample size
    d::Int # number of phenotypes per sample
    p::Int # number of (non-genetic) covariates per sample
    m::Int # number of parameters in cholesky matrix L
end

# computes trace of Γ = L.L*L.L'
function LinearAlgebra.tr(L::Cholesky)
    s = zero(eltype(L.factors))
    for Lij in LowerTriangular(L.factors)
        s += abs2(Lij)
    end
    return s
end

# overwrites lower triangular part of `A` by `v`
function un_vech!(A::AbstractMatrix, v::AbstractVector)
    m, n = size(A, 1), size(A, 2)
    idx = 1
    for j in 1:n, i in j:m
        A[i, j] = v[idx]
        idx += 1
    end
    A
end
function un_vech!(L::Cholesky, v::AbstractVector)
    un_vech!(L.factors, v)
end

function update_res!(data::MultivariateCopulaData, i::Int, std_res::Vector, η::Matrix, ϕ::Vector)
    yi = @view(data.Y[i, :])
    ηi = @view(η[i, :])
    nuisance_counter = 1
    for j in eachindex(yi)
        μ_j = GLM.linkinv(data.veclink[j], ηi[j])
        varμ_j = GLM.glmvar(data.vecdist[j], μ_j) # Note: for negative binomial, d.r is used
        res_j = yi[j] - μ_j
        std_res[j] = res_j / sqrt(varμ_j)
    end
    return nothing
end

loglik_obs(::Bernoulli, y, μ, wt, ϕ) = wt*GLM.logpdf(Bernoulli(μ), y)
loglik_obs(::Binomial, y, μ, wt, ϕ) = GLM.logpdf(Binomial(Int(wt), μ), Int(y*wt))
loglik_obs(::Gamma, y, μ, wt, ϕ) = wt*GLM.logpdf(Gamma(inv(ϕ), μ*ϕ), y)
loglik_obs(::InverseGaussian, y, μ, wt, ϕ) = wt*GLM.logpdf(InverseGaussian(μ, inv(ϕ)), y)
loglik_obs(::Normal, y, μ, wt, ϕ) = wt*GLM.logpdf(Normal(μ, sqrt(abs(ϕ))), y)
loglik_obs(::Poisson, y, μ, wt, ϕ) = logpdf(Poisson(μ), y)

function component_loglikelihood(
    data::MultivariateCopulaData, i::Int, η::Matrix, ϕ::Vector
    )
    yi = data.Y[i, :]
    ηi = η[i, :]
    logl = 0.0
    for j in eachindex(yi)
        dist = data.vecdist[j]
        link = data.veclink[j]
        μ_ij = GLM.linkinv(link, ηi[j])
        logl += loglik_obs(dist, yi[j], μ_ij, 1.0, 1.0)
    end
    return logl::Float64
end

function loglikelihood!(
    par::Vector, # first p*d are β, next m are for vech(L), next s are for nuisance
    data::MultivariateCopulaData,
    )
    # allocate storages based on `par`
    n, p, m, d = data.n, data.p, data.m, data.d
    B = zeros(p, d)
    copyto!(B, 1, par, 1, p * d)
    L = cholesky(zeros(d, d), check=false)
    un_vech!(L, @view(par[p * d + 1:p * d + m]))
    ϕ = par[p * d + m + 1:end]
    std_res = zeros(d)
    η = zeros(n, d)
    storage_d = zeros(d)    

    # loglikelihood for each sample
    mul!(η, data.X, B)
    logl = zero(eltype(data.X))
    for i in 1:data.n
        # update res and std_res
        update_res!(data, i, std_res, η, ϕ)
        # loglikelihood term 2, i.e. sum sum ln(f_ij | β)
        logl = component_loglikelihood(data, i, η, ϕ)
        # loglikelihood term 1, i.e. -sum ln(1 + 0.5tr(Γ))
        logl -= log(1 + 0.5tr(L))
        # loglikelihood term 3 i.e. sum ln(1 + 0.5 r*Γ*r)
        mul!(storage_d, Transpose(L.L), std_res)
        logl += log(1 + 0.5sum(abs2, storage_d))
    end
    return logl
end

# create instance of MultivariateCopulaData
n = 10000
d = 5
p = 10
m = (d*(d+1)) >> 1
X = randn(n, p)
possible_distributions = [Bernoulli, Poisson, Normal]
vecdist = rand(possible_distributions, d)
veclink = [canonicallink(vecdist[j]()) for j in 1:d]
if typeof(vecdist) <: Vector{UnionAll}
    vecdist = [vecdist[j]() for j in 1:d]
end

# simulate Y
Y = zeros(n, d)
for j in 1:d
    dist = vecdist[j]
    for i in 1:n
        Y[i, j] = rand(dist)
    end
end

# vecdist = [Normal() for _ in 1:d]
# veclink = [IdentityLink() for _ in 1:d]
data = MultivariateCopulaData(Y, X, vecdist, veclink, n, d, p, m);

In [27]:
# obj
par = randn(p*d+m)
@show loglikelihood!(par, data)

# compute grad with Enzyme.jl
grad_storage = zeros(length(par))
Enzyme.autodiff(
    Reverse, loglikelihood!,
    Duplicated(par, grad_storage),
    Const(data)
)
@show grad_storage[1:p*d]
@show grad_storage[p*d+1:end];

loglikelihood!(par, data) = -22.787101329064896


LoadError: Enzyme execution failed.
Mismatched activity for:   %value_phi69 = phi {} addrspace(10)* [ %71, %idxend102 ], [ addrspacecast ({}* inttoptr (i64 4851506176 to {}*) to {} addrspace(10)*), %L198.preheader ] const val: {} addrspace(10)* addrspacecast ({}* inttoptr (i64 4851506176 to {}*) to {} addrspace(10)*)
 value=0.0 of type Float64
You may be using a constant variable as temporary storage for active memory (https://enzyme.mit.edu/julia/stable/faq/#Activity-of-temporary-storage). If not, please open an issue, and either rewrite this variable to not be conditionally active or use Enzyme.API.runtimeActivity!(true) as a workaround for now

Stacktrace:
 [1] getproperty
   @ ./Base.jl:37
 [2] component_loglikelihood
   @ ./In[26]:70


In [28]:
using Enzyme
using Distributions
using GLM
using Random
import GLM.loglik_obs

struct MultiResponse{T, D, L}
    y::Vector{T} # d by 1 vector
    vecdist::Vector{D} # length d vector of marginal distributions, one for each y[i]
    veclink::Vector{L} # length d vector of link functions, one for each y[i]
end

function component_loglikelihood(data::MultiResponse, η::Vector)
    logl = 0.0
    for j in eachindex(data.y)
        dist = data.vecdist[j]
        link = data.veclink[j]
        μ_j = GLM.linkinv(link, η[j])
        logl += loglik_obs(dist, y[j], μ_j, 1.0, 1.0)::Float64
    end
    return logl # type annotation prevents "Duplicate return not supported" error
end

# simulate data
d = 10
possible_distributions = [Bernoulli(), Poisson(), Normal()]
vecdist = rand(possible_distributions, d)
veclink = [canonicallink(vecdist[j]) for j in 1:d]
y = zeros(d)
for j in 1:d
    dist = vecdist[j]
    y[j] = rand(dist)
end
data = MultiResponse(y, vecdist, veclink)

# eval obj
η = randn(d)
component_loglikelihood(data, η)

-9.725361506249175

In [29]:
# compute grad with Enzyme.jl
grad_storage = zeros(length(η))
Enzyme.autodiff(
    Reverse, component_loglikelihood,
    Const(data),
    Duplicated(η, grad_storage)
)
grad_storage

10-element Vector{Float64}:
  0.29876007665027543
  0.5877054458897542
 -0.9098816118368459
  0.6951580633937648
  1.1104443083587645
  0.2900115294652468
 -0.49009064441604466
 -0.14379185622547597
 -0.8386788794861753
 -2.2824990291522864

In [37]:
using Enzyme
using Distributions
using GLM
using Random
import GLM.loglik_obs

function f(x::Vector, y, vecdist, veclink)
    logl = 0.0
    for j in eachindex(y)
        dist = vecdist[j]
        link = veclink[j]
        μ_j = GLM.linkinv(link, x[j])
        logl += loglik_obs(dist, y[j], μ_j, 1.0, 1.0)
    end
#     return logl::Float64 # type annotation prevents "Duplicate return not supported" error
    return logl
end

# simulate data
d = 10
possible_distributions = [Bernoulli(), Poisson(), Normal()]
vecdist = rand(possible_distributions, d)
veclink = [canonicallink(vecdist[j]) for j in 1:d]
y = [rand(dist) for dist in vecdist] |> Vector{Float64}

# eval obj
x = randn(d)
f(x, y, vecdist, veclink)

-20.18930291405734

In [38]:
y

10-element Vector{Float64}:
 -0.3309293141867698
  1.0
 -0.32500156692931625
  1.0
  1.0
  0.0
 -0.6129602928216916
  4.0
  1.0
  2.0

In [39]:
@code_warntype f(x, y, vecdist, veclink)

MethodInstance for f(::Vector{Float64}, ::Vector{Float64}, ::Vector{UnivariateDistribution}, ::Vector{Link})
  from f([90mx[39m::[1mVector[22m, [90my[39m, [90mvecdist[39m, [90mveclink[39m)[90m @[39m [90mMain[39m [90m[4mIn[37]:7[24m[39m
Arguments
  #self#[36m::Core.Const(f)[39m
  x[36m::Vector{Float64}[39m
  y[36m::Vector{Float64}[39m
  vecdist[36m::Vector{UnivariateDistribution}[39m
  veclink[36m::Vector{Link}[39m
Locals
  @_6[33m[1m::Union{Nothing, Tuple{Int64, Int64}}[22m[39m
  logl[91m[1m::Any[22m[39m
  j[36m::Int64[39m
  μ_j[91m[1m::Any[22m[39m
  link[91m[1m::Link[22m[39m
  dist[91m[1m::UnivariateDistribution[22m[39m
Body[91m[1m::Any[22m[39m
[90m1 ─[39m       (logl = 0.0)
[90m│  [39m %2  = Main.eachindex(y)[36m::Base.OneTo{Int64}[39m
[90m│  [39m       (@_6 = Base.iterate(%2))
[90m│  [39m %4  = (@_6 === nothing)[36m::Bool[39m
[90m│  [39m %5  = Base.not_int(%4)[36m::Bool[39m
[90m└──[39m       goto #4 if not %5
[

In [40]:
@code_warntype loglik_obs(Bernoulli(), true, 0.5, 1.0, 1.0)

MethodInstance for GLM.loglik_obs(::Bernoulli{Float64}, ::Bool, ::Float64, ::Float64, ::Float64)
  from loglik_obs(::[1mBernoulli[22m, [90my[39m, [90mμ[39m, [90mwt[39m, [90mϕ[39m)[90m @[39m [90mGLM[39m [90m~/.julia/packages/GLM/vM20T/src/[39m[90m[4mglmtools.jl:527[24m[39m
Arguments
  #self#[36m::Core.Const(GLM.loglik_obs)[39m
  _[36m::Bernoulli{Float64}[39m
  y[36m::Bool[39m
  μ[36m::Float64[39m
  wt[36m::Float64[39m
  ϕ[36m::Float64[39m
Body[36m::Float64[39m
[90m1 ─[39m %1 = GLM.Bernoulli(μ)[36m::Bernoulli{Float64}[39m
[90m│  [39m %2 = GLM.logpdf(%1, y)[36m::Float64[39m
[90m│  [39m %3 = (wt * %2)[36m::Float64[39m
[90m└──[39m      return %3



In [27]:
# compute grad with Enzyme.jl
grad_storage = zeros(length(x))
Enzyme.autodiff(
    Reverse, f, Active, 
    Duplicated(x, grad_storage),
    Const(y),
    Const(vecdist),
    Const(veclink),
)
grad_storage

10-element Vector{Float64}:
 -0.7459511440030229
  1.5542812469558198
 -0.08221167111277394
  0.25824707444595857
 -1.6895010599606508
  0.6905515909519077
 -1.7728286543952865
 -0.8917142836857651
  0.7447009319673791
  0.15132394231963636