# Check if gradient is correct numerically

We will check by numerical forward difference implemented in `ForwardDiff.jl`

In [1]:
using MendelIHT
using ForwardDiff
using Random
using LinearAlgebra

## First simulate data

In [2]:
function simulate_multivariate_sparse(
    n::Int, p::Int, k::Int, r::Int; seed::Int=2021)
    # set seed
    Random.seed!(seed)
    
    # simulate `.bed` file with no missing data
    x = simulate_random_snparray(undef, n, p)
    xla = convert(Matrix{Float64}, x, impute=true, center=true, scale=true)
    n, p = size(x)

    # intercept is the only nongenetic covariate
    z = ones(n, 1)
    intercepts = randn(r)' # each trait have different intercept

    # simulate response y, true model b, and the correct non-0 positions of b
    Y, true_Σ, true_b, correct_position = simulate_random_response(xla, k, r, Zu=z*intercepts, overlap=0);
    
    return xla, Matrix(z'), true_b, true_Σ, Matrix(Y')
end

n = 100
p = 1000
k = 10
r = 2
seed = 2021
xla, Z, true_b, Σ, Y = simulate_multivariate_sparse(n, p, k, r,seed=seed)
X = transpose(xla);

## Loglikelihood and gradient functions according to math

Evaluate gradient and loglikelihood where $B_{ij} = 0$ and $\Gamma$ is identity. 

In [3]:
function loglikelihood_B(B)
    resid = Y - B * X
    n = size(resid, 2)
    return n/2 * logdet(Γ) - 0.5 * tr(Γ*resid*resid')
end

function grad_B(B)
    return Γ * (Y - B * X) * X'
end

grad_B (generic function with 1 method)

In [4]:
function loglikelihood_Γ(Γ)
    resid = Y - B * X
    n = size(resid, 2)
    return n/2 * logdet(Γ) - 0.5 * tr(Γ*resid*resid')
end

function grad_Γ(Γ)
    resid = Y - B * X
    return 0.5n * inv(Γ) - 0.5 * resid * resid'
end

grad_Γ (generic function with 1 method)

### Actual gradient with respect to B

In [5]:
B = zeros(r, p)
Γ = Matrix{Float64}(I, r, r)
@show loglikelihood_B(B)
∇B = grad_B(B)

loglikelihood_B(B) = -625.8199404112053


2×1000 Matrix{Float64}:
 -7.56848  17.3549  -10.1027   -5.75132  …  -0.300151   0.275582  -1.04698
  1.99913  22.5213    8.01207  36.7295      36.759     -6.99386   54.6926

### Numerical gradient with respect to B

In [6]:
g = x -> ForwardDiff.gradient(loglikelihood_B, x)
g(B)

2×1000 Matrix{Float64}:
 -7.56848  17.3549  -10.1027   -5.75132  …  -0.300151   0.275582  -1.04698
  1.99913  22.5213    8.01207  36.7295      36.759     -6.99386   54.6926

### Actual gradient with respect to $\Gamma$

In [7]:
B = zeros(r, p)
Γ = Matrix{Float64}(I, r, r)
@show loglikelihood_Γ(Γ)
∇Γ = grad_Γ(Γ)

loglikelihood_Γ(Γ) = -625.8199404112053


2×2 Matrix{Float64}:
  -70.0254  -124.976
 -124.976   -455.794

### Numerical gradient with respect to $\Gamma$

In [8]:
g = x -> ForwardDiff.gradient(loglikelihood_Γ, x)
g(Γ)

2×2 Matrix{Float64}:
  -70.0254  -124.976
 -124.976   -455.794