# Benchmarks for Neg Bin variance compoentn model 

In [1]:
using Revise
using DataFrames, Random, GLM, GLMCopula
using ForwardDiff, Test, LinearAlgebra
using LinearAlgebra: BlasReal, copytri!
using ToeplitzMatrices
using BenchmarkTools

┌ Info: Precompiling GLMCopula [c47b6ae2-b804-4668-9957-eb588c99ffbc]
└ @ Base loading.jl:1423


## 8 BLAS threads, single thread loglikelihood

In [44]:
BLAS.set_num_threads(8)
Threads.nthreads()

1

In [38]:
p_fixed = 3    # number of fixed effects, including intercept
m = 1    # number of variance components
# true parameter values
Random.seed!(12345)
# try next
βtrue = rand(Uniform(-2, 2), p_fixed)
θtrue = [0.5]
rtrue = 10.0

d = NegativeBinomial()
link = LogLink()
D = typeof(d)
Link = typeof(link)
T = Float64

samplesize = 10000
ni = 25

gcs = Vector{NBCopulaVCObs{T, D, Link}}(undef, samplesize)

Γ = θtrue[1] * ones(ni, ni)

# for reproducibility I will simulate all the design matrices here
Random.seed!(12345)
X_samplesize = [randn(ni, p_fixed - 1) for i in 1:samplesize]

for i in 1:samplesize
    X = [ones(ni) X_samplesize[i]]
    η = X * βtrue
    μ = exp.(η)
    p = rtrue ./ (μ .+ rtrue)
    vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
    vecd = [NegativeBinomial(rtrue, p[i]) for i in 1:ni]
    nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ)
    # simuate single vector y
    y = Vector{Float64}(undef, ni)
    res = Vector{Float64}(undef, ni)
    rand(nonmixed_multivariate_dist, y, res)
    V = [ones(ni, ni)]
    gcs[i] = NBCopulaVCObs(y, X, V, d, link)
end

# form VarLmmModel
gcm = NBCopulaVCModel(gcs);

In [39]:
@time GLMCopula.fit!(gcm, tol=1e-8);

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
gcm.β = [1.792818935905615, 1.4631001984732919, -0.6408675831900047]
initializing variance components using MM-Algorithm
gcm.θ = [1.0801333444594738]
initializing r using Newton update
initializing variance parameters in VC model using MM-algorithm
Converging when tol ≤ 1.0e-8 (max block iter = 10)
Block iter 1 r = 9.97, logl = -654879.1, tol = 654879.1026062825
Block iter 2 r = 9.97, logl = -654869.28, tol = 1.5005134518784893e-5
Block iter 3 r = 9.97, logl = -654869.27, tol = 1.5704143686869547e-8
 13.474499 seconds (5.44 M allocations: 166.920 MiB, 0.82% gc time)


In [17]:
@time GLMCopula.fit!(gcm, tol=1e-8);

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
gcm.β = [1.792818935905615, 1.4631001984732919, -0.6408675831900047]
initializing variance components using MM-Algorithm
gcm.θ = [1.0801333444594738]
initializing r using Newton update
initializing variance parameters in VC model using MM-algorithm
Converging when tol ≤ 1.0e-8 (max block iter = 10)
Block iter 1 r = 9.97, logl = -654879.1, tol = 654879.1026062825
Block iter 2 r = 9.97, logl = -654869.28, tol = 1.5005134518784893e-5
Block iter 3 r = 9.97, logl = -654869.27, tol = 1.5704143686869547e-8
 13.688452 seconds (6.04 M allocations: 196.415 MiB, 2.34% compilation time)


In [40]:
loglikelihood!(gcm, true, true)

-654869.2656070774

In [41]:
vcov!(gcm)
@show GLMCopula.confint(gcm)

GLMCopula.confint(gcm) = [1.778347717870949 1.78155339818036; 1.4652506389241986 1.4687708899007843; -0.6436471286710479 -0.6398741595416326; 9.856969332439316 10.086480519177215; 0.4809021803669017 0.4809021803669017]


5×2 Matrix{Float64}:
  1.77835    1.78155
  1.46525    1.46877
 -0.643647  -0.639874
  9.85697   10.0865
  0.480902   0.480902

After precompilation, NBVC takes ~13 seconds with multithreaded BLAS and single-threaded loglikelihood evaluations.

## 1 BLAS thread, 8 thread loglikelihood

In [2]:
BLAS.set_num_threads(1)
Threads.nthreads()

8

In [5]:
p_fixed = 3    # number of fixed effects, including intercept
m = 1    # number of variance components
# true parameter values
Random.seed!(12345)
# try next
βtrue = rand(Uniform(-2, 2), p_fixed)
θtrue = [0.5]
rtrue = 10.0

d = NegativeBinomial()
link = LogLink()
D = typeof(d)
Link = typeof(link)
T = Float64

samplesize = 10000
ni = 25

gcs = Vector{NBCopulaVCObs{T, D, Link}}(undef, samplesize)

Γ = θtrue[1] * ones(ni, ni)

# for reproducibility I will simulate all the design matrices here
Random.seed!(12345)
X_samplesize = [randn(ni, p_fixed - 1) for i in 1:samplesize]

for i in 1:samplesize
    X = [ones(ni) X_samplesize[i]]
    η = X * βtrue
    μ = exp.(η)
    p = rtrue ./ (μ .+ rtrue)
    vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
    vecd = [NegativeBinomial(rtrue, p[i]) for i in 1:ni]
    nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ)
    # simuate single vector y
    y = Vector{Float64}(undef, ni)
    res = Vector{Float64}(undef, ni)
    rand(nonmixed_multivariate_dist, y, res)
    V = [ones(ni, ni)]
    gcs[i] = NBCopulaVCObs(y, X, V, d, link)
end

# form VarLmmModel
gcm = NBCopulaVCModel(gcs);

In [4]:
@time GLMCopula.fit!(gcm, tol=1e-8);

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
gcm.β = [1.792818935905615, 1.4631001984732919, -0.6408675831900047]
initializing variance components using MM-Algorithm
gcm.θ = [1.0801333444594738]
initializing r using Newton update
initializing variance parameters in VC model using MM-algorithm
Converging when tol ≤ 1.0e-8 (max block iter = 10)

******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

Block iter 1 r = 9.97, logl = -654879.1, tol = 654879.1026062839
Block iter 2 r = 9.97, logl = -654869.28, tol = 1.5005134519140392e-5
Block iter 3 r = 9.97, logl = -654869.27, tol = 1.5704154

In [6]:
@time GLMCopula.fit!(gcm, tol=1e-8);

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
gcm.β = [1.792818935905615, 1.4631001984732919, -0.6408675831900047]
initializing variance components using MM-Algorithm
gcm.θ = [1.0801333444594738]
initializing r using Newton update
initializing variance parameters in VC model using MM-algorithm
Converging when tol ≤ 1.0e-8 (max block iter = 10)
Block iter 1 r = 9.97, logl = -654879.1, tol = 654879.1026062839
Block iter 2 r = 9.97, logl = -654869.28, tol = 1.5005134519140392e-5
Block iter 3 r = 9.97, logl = -654869.27, tol = 1.570415435298091e-8
  5.245830 seconds (5.45 M allocations: 167.462 MiB)


In [7]:
loglikelihood!(gcm, true, true)

-654869.2656070814

In [8]:
vcov!(gcm)
@show GLMCopula.confint(gcm)

GLMCopula.confint(gcm) = [1.778347717870949 1.78155339818036; 1.4652506389241986 1.4687708899007843; -0.6436471286710479 -0.6398741595416326; 9.856969332439316 10.086480519177215; 0.4809021803669017 0.4809021803669017]


5×2 Matrix{Float64}:
  1.77835    1.78155
  1.46525    1.46877
 -0.643647  -0.639874
  9.85697   10.0865
  0.480902   0.480902

**Conclusion:** After precompilation, NBAR takes ~5 seconds with single threaded BLAS and 8-thread loglikelihood evaluations.

# Profile code

In [None]:
using ProfileView
BLAS.set_num_threads(1)

@profview GLMCopula.fit!(gcm, maxBlockIter = 20);
@profview GLMCopula.fit!(gcm, maxBlockIter = 20);