# Compare Autodiff speed with explicit formulas 

In [3]:
using Revise
using DataFrames, Random, GLM, QuasiCopula
using ForwardDiff, Test, LinearAlgebra
using LinearAlgebra: BlasReal, copytri!
using ToeplitzMatrices
using BenchmarkTools
using SnpArrays
using MendelPlots
ENV["COLUMNS"] = 240

BLAS.set_num_threads(1)
Threads.nthreads()

function A_mul_b!(c::AbstractVector{T}, A::AbstractMatrix, b::AbstractVector) where T
    n, p = size(A)
    fill!(c, zero(T))
    for j in 1:p, i in 1:n
        c[i] += A[i, j] * b[j]
    end
    return c
end

A_mul_b! (generic function with 1 method)

## $f(\beta) = \frac{1}{2}(y - X\beta)^t(y - X\beta)$

The closed form gradient is clearly 
\begin{align*}
\nabla f(\beta) &= -X^t(y - X\beta)\\
\nabla^2 f(\beta) &= X^tX
\end{align*}
Lets check how fast autodiff can compute them, compared to evaluating them directly.

In [16]:
# closed for obj, grad, and hessians
function f(β::AbstractVector{T}, X::AbstractMatrix, y::AbstractVector) where T
    r = zeros(T, size(X, 1))
    μ = zeros(T, size(X, 1))
    A_mul_b!(μ, X, β)
    r .= y .- μ
    return 0.5 * dot(r, r)
end
function ∇f(β, X, y, r = zeros(size(X, 1)), μ=zeros(size(X, 1)))
    A_mul_b!(μ, X, β)
    r .= y .- μ
    return -X'*r
end
function ∇²f(β, X, y, r = zeros(size(X, 1)), μ=zeros(size(X, 1)))
    return X'*X
end

# autodiff objective, grad, and hessian
eval_f(β) = f(β, X, y)
∇f_auto = x -> ForwardDiff.gradient(eval_f, x)
∇²f_auto = x -> ForwardDiff.hessian(eval_f, x)

#15 (generic function with 1 method)

### First check answer

In [29]:
n = 100
p = 100
X = randn(n, p)
β = randn(p)
y = X * β

# check obj
@show f(β, X, y)
@show eval_f(β);

f(β, X, y) = 9.944023118618397e-28
eval_f(β) = 9.944023118618397e-28


In [30]:
# check grad
[∇f(β, X, y) ∇f_auto(β)]

100×2 Matrix{Float64}:
 -5.57678e-15  -5.57678e-15
 -4.21966e-16  -4.21966e-16
 -7.64662e-15  -7.64662e-15
  1.41683e-14   1.41683e-14
 -2.55724e-14  -2.55724e-14
  2.42931e-14   2.42931e-14
 -7.69064e-14  -7.69064e-14
  4.65141e-15   4.65141e-15
 -4.19809e-14  -4.19809e-14
 -1.1506e-13   -1.1506e-13
 -2.85472e-15  -2.85472e-15
  5.84794e-14   5.84794e-14
  1.7187e-14    1.7187e-14
  ⋮            
  5.682e-14     5.682e-14
 -4.10495e-16  -4.10495e-16
 -1.3364e-14   -1.3364e-14
  6.61287e-14   6.61287e-14
 -2.73213e-14  -2.73213e-14
  7.21895e-14   7.21895e-14
 -4.25375e-14  -4.25375e-14
 -3.66414e-14  -3.66414e-14
  9.92626e-15   9.92626e-15
  5.08299e-14   5.08299e-14
  2.62754e-15   2.62754e-15
  2.87784e-14   2.87784e-14

In [31]:
# check Hessian 
H = ∇²f(β, X, y)
autoH = ∇²f_auto(β)
[vec(H) vec(autoH)]

10000×2 Matrix{Float64}:
 119.745    119.745
  -5.80716   -5.80716
  -8.9997    -8.9997
  -1.18796   -1.18796
   8.67201    8.67201
  -6.17836   -6.17836
   8.38473    8.38473
  -4.179     -4.179
  -1.93365   -1.93365
 -13.9399   -13.9399
  -4.32774   -4.32774
  -7.95671   -7.95671
  -1.08977   -1.08977
   ⋮        
   5.32354    5.32354
  -7.45453   -7.45453
  12.4775    12.4775
   9.09656    9.09656
   5.32017    5.32017
   4.39292    4.39292
 -11.9773   -11.9773
   4.00896    4.00896
  -4.62155   -4.62155
  11.8331    11.8331
   9.78392    9.78392
 114.875    114.875

### Check timings

In [28]:
# gradient time
@btime ∇f(β, X, y)
@btime ∇f_auto(β)

  878.542 μs (5 allocations: 7.65 MiB)
  220.715 ms (467 allocations: 16.79 MiB)


1000-element Vector{Float64}:
  1.6321954038222625e-12
  9.900029849635103e-14
  6.60467264998852e-13
  5.639302364233724e-13
 -2.3336143099206565e-13
 -1.0876015923364764e-12
  2.4145508108254525e-12
  8.560196882186117e-13
  5.085072934970569e-13
 -4.5327238892708e-13
 -4.640575089703949e-13
  1.3870106707262891e-12
 -8.35149769034771e-13
  ⋮
  7.034051691074424e-13
  1.1818353356712639e-12
  1.3742213659241252e-12
 -6.055607261938495e-14
  2.1541920639901834e-12
 -7.47081478866019e-13
 -9.880517612907145e-13
 -9.45421841784405e-13
 -6.952139919132923e-13
  2.8041203485439603e-12
 -9.797590957048214e-13
 -6.39786389009473e-13

In [32]:
# hessian time
@btime ∇²f(β, X, y)
@btime ∇²f_auto(β)

  64.791 μs (4 allocations: 79.92 KiB)
  122.520 ms (424 allocations: 21.36 MiB)


100×100 Matrix{Float64}:
 119.745      -5.80716    -8.9997     -1.18796     8.67201    -6.17836     8.38473    -4.179      -1.93365   …  -13.4517     -1.73344   -14.5646      5.89816  -10.4461     -6.01242    9.94028    19.9976     -7.95113    0.273596
  -5.80716    91.3398    -20.0309      5.92322    22.855      -8.80886    -1.78752     2.38875    -1.43369       -2.7677     -0.795893    8.46903     4.31334   -8.12187    -3.21821   -6.97315   -11.0662    -10.6633   -10.9916
  -8.9997    -20.0309    118.199      -3.04363     0.709349   -4.69749    12.6706      0.170084   -3.44246        7.72426     0.502916    1.27746    11.8372    -5.32281    -8.02232   -2.39394    15.6116      4.06819    5.18208
  -1.18796     5.92322    -3.04363    77.3565     -4.62705    -4.12856     6.03224    11.3983     10.1077         7.62058    12.4171    -10.0888     -6.02778  -12.0567     11.3324    12.9987      2.0455     -3.26302    1.36018
   8.67201    22.855       0.709349   -4.62705    95.369     -15.76