# Lets check if $\nabla_\beta res$ is calculated correctly using ForwardDiff

The function is 

$$res_{ij}(\beta) = \frac{y_i - \mu_i}{\sigma_{ij}(\beta)}$$

In [62]:
using Revise
using DataFrames, Random, GLM, QuasiCopula
using ForwardDiff, Test, LinearAlgebra
using LinearAlgebra: BlasReal, copytri!
using ToeplitzMatrices
using BenchmarkTools
using SnpArrays
using ForwardDiff
# using MendelPlots

BLAS.set_num_threads(1)
Threads.nthreads()

function simulate_random_snparray(s::Union{String, UndefInitializer}, n::Int64,
    p::Int64; mafs::Vector{Float64}=zeros(Float64, p), min_ma::Int = 5)

    #first simulate a random {0, 1, 2} matrix with each SNP drawn from Binomial(2, r[i])
    A1 = BitArray(undef, n, p) 
    A2 = BitArray(undef, n, p) 
    for j in 1:p
        minor_alleles = 0
        maf = 0
        while minor_alleles <= min_ma
            maf = 0.5rand()
            for i in 1:n
                A1[i, j] = rand(Bernoulli(maf))
                A2[i, j] = rand(Bernoulli(maf))
            end
            minor_alleles = sum(view(A1, :, j)) + sum(view(A2, :, j))
        end
        mafs[j] = maf
    end

    #fill the SnpArray with the corresponding x_tmp entry
    return _make_snparray(s, A1, A2)
end

function _make_snparray(s::Union{String, UndefInitializer}, A1::BitArray, A2::BitArray)
    n, p = size(A1)
    x = SnpArray(s, n, p)
    for i in 1:(n*p)
        c = A1[i] + A2[i]
        if c == 0
            x[i] = 0x00
        elseif c == 1
            x[i] = 0x02
        elseif c == 2
            x[i] = 0x03
        else
            throw(MissingException("matrix shouldn't have missing values!"))
        end
    end
    return x
end


# simulate some data
function simulate_VC_longitudinal(;
    n = 1000, # sample size
    d = 5, # number of observations per sample
    p = 3, # number of nongenetic covariates, including intercept
    m = 2, # number of variance components
    q = 1000, # number of SNPs
    k = 10, # number of causal SNPs
    seed = 2022,
    y_distribution = Bernoulli,
    T = Float64,
    )
    m == 1 || m == 2 || error("m (number of VC) must be 1 or 2")
    
    # non-genetic effect sizes
    Random.seed!(seed)
    βtrue = rand(Uniform(-0.2, 0.2), p)
    dist = y_distribution()
    link = canonicallink(dist)
    Dist = typeof(dist)
    Link = typeof(link)

    # variance components
    θtrue = fill(0.1, m)
    V1 = ones(d, d)
    V2 = Matrix(I, d, d)
    Γ = m == 1 ? θtrue[1] * V1 : θtrue[1] * V1 + θtrue[2] * V2

    # simulate design matrices
    Random.seed!(seed)
    X_full = [hcat(ones(d), randn(d, p - 1)) for i in 1:n]

    # simulate random SnpArray with 100 SNPs and randomly choose k SNPs to be causal
    Random.seed!(2022)
    G = simulate_random_snparray(undef, n, q)
    Gfloat = convert(Matrix{T}, G, center=true, scale=false)
    γtrue = zeros(q)
    γtrue[1:k] .= rand([-0.2, 0.2], k)
    shuffle!(γtrue)
    η_G = Gfloat * γtrue

    # simulate phenotypes
    if y_distribution == Normal
        τtrue = 10.0
        σ2 = inv(τtrue)
        σ = sqrt(σ2)
        obs = Vector{GaussianCopulaVCObs{T}}(undef, n)
        for i in 1:n
            X = X_full[i]
            η = X * βtrue
            η .+= η_G[i] # add genetic effects
            μ = GLM.linkinv.(link, η)
            vecd = Vector{ContinuousUnivariateDistribution}(undef, d)
            for i in 1:d
                vecd[i] = y_distribution(μ[i], σ)
            end
            nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ)
            # simuate single vector y
            y = Vector{T}(undef, d)
            res = Vector{T}(undef, d)
            rand(nonmixed_multivariate_dist, y, res)
            V = m == 1 ? [V1] : [V1, V2]
            obs[i] = GaussianCopulaVCObs(y, X, V)
        end
        qc_model = GaussianCopulaVCModel(obs)
    else
        obs = Vector{GLMCopulaVCObs{T, Dist, Link}}(undef, n)
        for i in 1:n
            X = X_full[i]
            η = X * βtrue
            η .+= η_G[i] # add genetic effects
            μ = GLM.linkinv.(link, η)
            vecd = Vector{DiscreteUnivariateDistribution}(undef, d)
            for i in 1:d
                vecd[i] = y_distribution(μ[i])
            end
            nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ)
            # simuate single vector y
            y = Vector{T}(undef, d)
            res = Vector{T}(undef, d)
            rand(nonmixed_multivariate_dist, y, res)
            V = m == 1 ? [V1] : [V1, V2]
            obs[i] = GLMCopulaVCObs(y, X, V, dist, link)
        end
        qc_model = GLMCopulaVCModel(obs)
    end
    return qc_model, Γ, G, βtrue, θtrue, γtrue
end

k = 0 # number of causal SNPs

qc_model, Γ, G, βtrue, θtrue, γtrue = simulate_VC_longitudinal(
    n = 5000, # sample size
    d = 4, # number of observations per sample
    p = 3, # number of fixed effects, including intercept
    m = 1, # number of variance components
    q = 1000, # number of SNPs
    k = k, # number of causal SNPs
    seed = 1000,
    y_distribution = Normal,
    T = Float64,
)

@show qc_model;

qc_model = Quasi-Copula Variance Component Model
  * base distribution: Normal
  * link function: IdentityLink
  * number of clusters: 5000
  * cluster size min, max: 4, 4
  * number of variance components: 1
  * number of fixed effects: 3



## Normal

In [64]:
# sample data
X = qc_model.data[1].X # d by p
y = qc_model.data[1].y # d by 1

# objective
function resβ(β)
    η = X * β # d by 1
    μ = GLM.linkinv.(IdentityLink(), η)
    varμ = GLM.glmvar.(Normal(), μ)
    return (y - μ) ./ sqrt.(varμ) # d by 1
end

# mathematical gradient
function ∇resβ(β)
    d, p = size(X)
    ∇resβ = zeros(d, p)
    for i in 1:p, j in 1:d
        ∇resβ[j, i] = -X[j, i]
    end
    return ∇resβ # d × p
end

# autodiff gradient
∇resβ_autodiff = x -> ForwardDiff.jacobian(resβ, x)

# random beta vector
β = rand(size(qc_model.data[1].X, 2))

# check objective
@show resβ(β)

# compare mathematical and numerical gradient
[vec(∇resβ(β)) vec(∇resβ_autodiff(β))]

resβ(β) = [0.4625446462894864, -3.8311602032989285, -1.500388409286962, -0.21493061160209337]


12×2 Matrix{Float64}:
 -1.0        -1.0
 -1.0        -1.0
 -1.0        -1.0
 -1.0        -1.0
  2.07458     2.07458
 -1.94686    -1.94686
  0.0808759   0.0808759
  0.154606    0.154606
 -0.931964   -0.931964
 -2.26098    -2.26098
 -1.19819    -1.19819
  0.0763038   0.0763038

## Bernoulli

Assumes y, X are given. We calculate the residuals for just 1 sample

In [51]:
# sample data
X = qc_model.data[1].X # d by p
y = qc_model.data[1].y # d by 1

# objective
function resβ(β)
    η = X * β # d by 1
    μ = GLM.linkinv.(LogitLink(), η)
    varμ = GLM.glmvar.(Bernoulli(), μ)
    return (y - μ) ./ sqrt.(varμ) # d by 1
end

# mathematical gradient
function ∇resβ(β)
    d, p = size(X)
    ∇resβ = zeros(d, p)
    η = X * β # d by 1
    μ = GLM.linkinv.(LogitLink(), η) # d by 1
    varμ = GLM.glmvar.(Bernoulli(), μ) # d by 1
    res = (y - μ) ./ sqrt.(varμ) # d by 1
    for i in 1:p, j in 1:d
        varμ_j = varμ[j]
        x_ji = X[j, i]
        res_j = res[j]
        μ_j = μ[j]
        ∇resβ[j, i] = -sqrt(varμ_j) * x_ji - (0.5 * res_j * (1 - 2μ_j) * x_ji)
    end
    return ∇resβ # d × p
end

# autodiff gradient
∇resβ_autodiff = x -> ForwardDiff.jacobian(resβ, x)

# random beta vector
β = rand(size(qc_model.data[1].X, 2))

# check objective
@show resβ(β)

# compare mathematical and numerical gradient
[vec(∇resβ(β)) vec(∇resβ_autodiff(β))]

#30 (generic function with 1 method)

## Poisson

In [61]:
# sample data
X = qc_model.data[1].X # d by p
y = qc_model.data[1].y # d by 1

# objective
function resβ(β)
    η = X * β # d by 1
    μ = GLM.linkinv.(LogLink(), η)
    varμ = GLM.glmvar.(Poisson(), μ)
    return (y - μ) ./ sqrt.(varμ) # d by 1
end

# mathematical gradient
function ∇resβ(β)
    d, p = size(X)
    ∇resβ = zeros(d, p)
    η = X * β # d by 1
    μ = GLM.linkinv.(LogLink(), η) # d by 1
    varμ = GLM.glmvar.(Poisson(), μ) # d by 1
    res = (y - μ) ./ sqrt.(varμ) # d by 1
    dμ = GLM.mueta.(LogLink(), η) # d by 1
    for i in 1:p, j in 1:d
        varμ_j = varμ[j]
        x_ji = X[j, i]
        res_j = res[j]
        μ_j = μ[j]
        dμ_j = dμ[j]
        ∇resβ[j, i] = x_ji * (-(inv(sqrt(varμ_j)) + (0.5 * inv(varμ_j)) * res_j) * dμ_j)
    end
    return ∇resβ # d × p
end

# autodiff gradient
∇resβ_autodiff = x -> ForwardDiff.jacobian(resβ, x)

# random beta vector
β = rand(size(qc_model.data[1].X, 2))

# check objective
@show resβ(β)

# compare mathematical and numerical gradient
[vec(∇resβ(β)) vec(∇resβ_autodiff(β))]

resβ(β) = [0.8012638765796852, -6.734952547066679, -1.3994071698413866, -0.48023808695797]


12×2 Matrix{Float64}:
 -1.07727    -1.07727
 -3.65238    -3.65238
 -1.22049    -1.22049
 -1.02842    -1.02842
  2.23488     2.23488
 -7.11068    -7.11068
  0.0987079   0.0987079
  0.159001    0.159001
 -1.00397    -1.00397
 -8.25796    -8.25796
 -1.46237    -1.46237
  0.0784727   0.0784727