# Check if derivative of loglikelihood is correct numerically

In [1]:
using Revise
using ForwardDiff
using DataFrames, Random, GLM, GLMCopula
using ForwardDiff, Test, LinearAlgebra
using LinearAlgebra: BlasReal, copytri!
using SpecialFunctions

┌ Info: Precompiling ForwardDiff [f6369f11-7733-5829-9624-2563aa707210]
└ @ Base loading.jl:1317
┌ Info: Precompiling GLMCopula [c47b6ae2-b804-4668-9957-eb588c99ffbc]
└ @ Base loading.jl:1317


## First simulate data

In [4]:
Random.seed!(1234)

# sample size
N = 10000
# observations per subject
n = 5

variance_component_1 = 0.9
variance_component_2 = 0.1

r = 1
p = 0.7
μ = r * (1-p) * inv(p)

#var = r * (1-p) * inv(p^2)

# true beta
β_true = log(μ)

dist = NegativeBinomial

Γ = variance_component_1 * ones(n, n) + variance_component_2 * Matrix(I, n, n)
vecd = [dist(r, p) for i in 1:n]
nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ)

Y = simulate_nobs_independent_vectors(nonmixed_multivariate_dist, N)

10000-element Vector{Vector{Float64}}:
 [0.0, 1.0, 0.0, 0.0, 1.0]
 [2.0, 0.0, 0.0, 0.0, 1.0]
 [1.0, 0.0, 0.0, 4.0, 1.0]
 [0.0, 0.0, 0.0, 0.0, 1.0]
 [3.0, 3.0, 3.0, 1.0, 1.0]
 [0.0, 0.0, 0.0, 0.0, 0.0]
 [3.0, 0.0, 0.0, 0.0, 0.0]
 [0.0, 1.0, 0.0, 0.0, 2.0]
 [0.0, 1.0, 0.0, 0.0, 0.0]
 [0.0, 0.0, 0.0, 0.0, 0.0]
 [1.0, 0.0, 0.0, 0.0, 0.0]
 [0.0, 0.0, 0.0, 0.0, 0.0]
 [1.0, 0.0, 0.0, 0.0, 0.0]
 ⋮
 [0.0, 0.0, 1.0, 0.0, 0.0]
 [1.0, 0.0, 0.0, 0.0, 0.0]
 [0.0, 1.0, 0.0, 0.0, 1.0]
 [0.0, 0.0, 1.0, 2.0, 0.0]
 [1.0, 0.0, 0.0, 0.0, 3.0]
 [0.0, 0.0, 0.0, 0.0, 0.0]
 [0.0, 0.0, 1.0, 2.0, 1.0]
 [2.0, 2.0, 0.0, 0.0, 0.0]
 [0.0, 0.0, 0.0, 0.0, 0.0]
 [0.0, 0.0, 0.0, 1.0, 0.0]
 [2.0, 1.0, 2.0, 0.0, 0.0]
 [0.0, 0.0, 1.0, 0.0, 0.0]

## Loglikelihood and gradient as function of nuisance parameter

The loglikelihood has 3 terms, but only the 2nd and 3rd depend on $r$. The gradient of the 3rd term involves complicated chain rules, so let us check the 3rd term numerically.

In [119]:
# need y, μ, η to be predefined
function loglikelihood_r(r)
    D = Diagonal([sqrt(exp(η[j])*(exp(η[j])+r) / r) for j in 1:length(η)])
    resid = inv(D) * (y - μ)
    return log(1 + 0.5resid'*Γ*resid)
end

function grad_r(r)
    D = Diagonal([sqrt(exp(η[j])*(exp(η[j])+r) / r) for j in 1:length(η)])
    dD = Diagonal([-exp(2η[i]) / (2r^1.5 * sqrt(exp(η[i])*(exp(η[i])+r))) for i in 1:length(η)])
    resid = inv(D) * (y - μ)
    dresid = -inv(D)*dD*inv(D)*(y - μ)
    return resid'*Γ*dresid / (1 + 0.5resid'*Γ*resid)
end

function hess_r(r)
    D = Diagonal([sqrt(exp(η[j])*(exp(η[j])+r) / r) for j in 1:length(η)])
    dD = Diagonal([-exp(2η[i]) / (2r^1.5 * sqrt(exp(η[i])*(exp(η[i])+r))) for i in 1:length(η)])
    d2D = Diagonal([(exp(3η[i]) / (4r^1.5 * (exp(η[i])*(exp(η[i])+r))^(1.5))) + 
        (3exp(2η[i]) / (r^(2.5)*sqrt(exp(η[i])*(exp(η[i])+r)))) for i in 1:length(η)])
    resid = inv(D) * (y - μ)
    dresid = -inv(D)*dD*inv(D)*(y - μ)
    d2resid = (2inv(D)*dD*inv(D)*dD*inv(D) - inv(D)*d2D*inv(D))*(y - μ)
    denom = 1 + 0.5resid'*Γ*resid
    term1 = (resid'*Γ*dresid / denom)^2
    term2 = dresid'*Γ*dresid / denom
    term3 = resid'*Γ*d2resid / denom
    return -term1 + term2 + term3
end


hess_r (generic function with 1 method)

## Check math against numerical derivatives

In [130]:
# need y, η, μ, and r
y = [1.0, 0.0, 0.0, 4.0, 1.0]
η = randn(5)
μ = exp.(η)
r = 5.0

# first derivatives
g = x -> ForwardDiff.derivative(loglikelihood_r, x)
@show grad_r(r) # actual
@show g(r)      # numerical

# second derivatives
h = x -> ForwardDiff.derivative(g, x)
@show hess_r(r) # actual
@show h(r);     # numerical


grad_r(r) = 0.008551061091788182
g(r) = 0.008551061091788164
hess_r(r) = -0.011515402238817186
h(r) = -0.0038194472562078185
