# Benchmarks for Neg Bin Auto-regressive model 

In [1]:
using Revise
using DataFrames, Random, GLM, GLMCopula
using ForwardDiff, Test, LinearAlgebra
using LinearAlgebra: BlasReal, copytri!
using ToeplitzMatrices
using BenchmarkTools

┌ Info: Precompiling GLMCopula [c47b6ae2-b804-4668-9957-eb588c99ffbc]
└ @ Base loading.jl:1423


## 8 BLAS threads, single thread loglikelihood

In [2]:
BLAS.set_num_threads(8)
Threads.nthreads()

1

In [3]:
p_fixed = 3    # number of fixed effects, including intercept

# true parameter values
Random.seed!(12345)
βtrue = rand(Uniform(-2, 2), p_fixed)
rtrue = 10.0
σ2true = [0.5]
ρtrue = [0.5]

function get_V(ρ, n)
    vec = zeros(n)
    vec[1] = 1.0
    for i in 2:n
        vec[i] = vec[i - 1] * ρ
    end
    V = ToeplitzMatrices.SymmetricToeplitz(vec)
    V
end

#simulation parameters
samplesize = 10000

st = time()
currentind = 1
d = NegativeBinomial()
link = LogLink()
D = typeof(d)
Link = typeof(link)
T = Float64

gcs = Vector{NBCopulaARObs{T, D, Link}}(undef, samplesize)

ni = 25 #  number of observations per individual
V = get_V(ρtrue[1], ni)

# true Gamma
Γ = σ2true[1] * V

# for reproducibility I will simulate all the design matrices here
Random.seed!(12345)
X_samplesize = [randn(ni, p_fixed - 1) for i in 1:samplesize]

for i in 1:samplesize
    X = [ones(ni) X_samplesize[i]]
    # X = [ones(ni) randn(ni, p - 1)]
    # X = ones(ni, 1)
    # y = Float64.(Y_nsample[i])
    η = X * βtrue
    μ = exp.(η)
    p = rtrue ./ (μ .+ rtrue)
    vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
    vecd = [NegativeBinomial(rtrue, p[i]) for i in 1:ni]
    nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ)
    # simuate single vector y
    y = Vector{Float64}(undef, ni)
    res = Vector{Float64}(undef, ni)
    rand(nonmixed_multivariate_dist, y, res)
    # push!(Ystack, y)
    V = [ones(ni, ni)]
    # V = [ones(ni, ni)]
    gcs[i] = NBCopulaARObs(y, X, d, link)
end

# form model
gcm = NBCopulaARModel(gcs);

In [4]:
@time GLMCopula.fit!(gcm, maxBlockIter = 20, tol=1e-8);

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
initializing σ2 and ρ using method of moments
initializing r using Newton update
initializing variance parameters in CS model using mom
Converging when tol ≤ 1.0e-8 (max block iter = 20)

******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

Block iter 1 r = 9.63, logl = -659876.38, tol = 659876.3844181958
Block iter 2 r = 9.87, logl = -659849.9, tol = 4.013178849649874e-5
Block iter 3 r = 9.89, logl = -659840.08, tol = 1.4888447019632147e-5
Block iter 4 r = 9.91, logl = -659840.95, tol = 1.3245809250537448e-6
Block iter 5 r = 9.96, logl = -

In [20]:
loglikelihood!(gcm, true, true)

-659839.6133726223

In [19]:
A = SymmetricToeplitz(ones(10))
B = randn(10, 10)
C = zeros(10, 10)
LinearAlgebra.mul!(C, A, B)

LoadError: MethodError: no method matching plan_fft!(::Vector{ComplexF64}, ::UnitRange{Int64})
[0mClosest candidates are:
[0m  plan_fft!(::AbstractArray; kws...) at ~/.julia/packages/AbstractFFTs/Ih3rT/src/definitions.jl:52

In [6]:
@time GLMCopula.fit!(gcm, maxBlockIter = 20, tol=1e-8);

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
initializing σ2 and ρ using method of moments
initializing r using Newton update
initializing variance parameters in CS model using mom
Converging when tol ≤ 1.0e-8 (max block iter = 20)
Block iter 1 r = 9.63, logl = -659876.38, tol = 659876.3844181958
Block iter 2 r = 9.87, logl = -659849.9, tol = 4.013178849649874e-5
Block iter 3 r = 9.89, logl = -659840.08, tol = 1.4888447019632147e-5
Block iter 4 r = 9.91, logl = -659840.95, tol = 1.3245809250537448e-6
Block iter 5 r = 9.96, logl = -659840.73, tol = 3.3603836102156463e-7
Block iter 6 r = 9.95, logl = -659839.72, tol = 1.5373802639043203e-6
Block iter 7 r = 9.94, logl = -659839.91, tol = 2.9393202737438934e-7
Block iter 8 r = 9.92, logl = -659840.0, tol = 1.312235713428575e-7
Block iter 9 r = 9.97, logl = -659840.8, tol = 1.218456117301108e-6
Block iter 10 r = 9.96, logl = -659839.64, tol = 1.7524068759646176e-6
B

After precompilation, NBAR takes ~85 seconds with multithreaded BLAS and single-threaded loglikelihood evaluations.

## 1 BLAS thread, 8 thread loglikelihood

In [2]:
BLAS.set_num_threads(1)
Threads.nthreads()

8

In [3]:
p_fixed = 3    # number of fixed effects, including intercept

# true parameter values
Random.seed!(12345)
βtrue = rand(Uniform(-2, 2), p_fixed)
rtrue = 10.0
σ2true = [0.5]
ρtrue = [0.5]

function get_V(ρ, n)
    vec = zeros(n)
    vec[1] = 1.0
    for i in 2:n
        vec[i] = vec[i - 1] * ρ
    end
    V = ToeplitzMatrices.SymmetricToeplitz(vec)
    V
end

#simulation parameters
samplesize = 10000

st = time()
currentind = 1
d = NegativeBinomial()
link = LogLink()
D = typeof(d)
Link = typeof(link)
T = Float64

gcs = Vector{NBCopulaARObs{T, D, Link}}(undef, samplesize)

ni = 25 #  number of observations per individual
V = get_V(ρtrue[1], ni)

# true Gamma
Γ = σ2true[1] * V

# for reproducibility I will simulate all the design matrices here
Random.seed!(12345)
X_samplesize = [randn(ni, p_fixed - 1) for i in 1:samplesize]

for i in 1:samplesize
    X = [ones(ni) X_samplesize[i]]
    # X = [ones(ni) randn(ni, p - 1)]
    # X = ones(ni, 1)
    # y = Float64.(Y_nsample[i])
    η = X * βtrue
    μ = exp.(η)
    p = rtrue ./ (μ .+ rtrue)
    vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
    vecd = [NegativeBinomial(rtrue, p[i]) for i in 1:ni]
    nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ)
    # simuate single vector y
    y = Vector{Float64}(undef, ni)
    res = Vector{Float64}(undef, ni)
    rand(nonmixed_multivariate_dist, y, res)
    # push!(Ystack, y)
    V = [ones(ni, ni)]
    # V = [ones(ni, ni)]
    gcs[i] = NBCopulaARObs(y, X, d, link)
end

# form model
gcm = NBCopulaARModel(gcs);

In [4]:
@time GLMCopula.fit!(gcm, maxBlockIter = 20, tol=1e-8);

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
initializing σ2 and ρ using method of moments
initializing r using Newton update
initializing variance parameters in CS model using mom
Converging when tol ≤ 1.0e-8 (max block iter = 20)

******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

Block iter 1 r = 9.63, logl = -659876.38, tol = 659876.3844181958
Block iter 2 r = 9.87, logl = -659849.9, tol = 4.0131788494028864e-5
Block iter 3 r = 9.89, logl = -659840.08, tol = 1.4888447022102083e-5
Block iter 4 r = 9.91, logl = -659840.95, tol = 1.3245809299937661e-6
Block iter 5 r = 9.96, logl = 

In [5]:
@time GLMCopula.fit!(gcm, maxBlockIter = 20, tol=1e-8);

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
initializing σ2 and ρ using method of moments
initializing r using Newton update
initializing variance parameters in CS model using mom
Converging when tol ≤ 1.0e-8 (max block iter = 20)
Block iter 1 r = 10.01, logl = -659841.66, tol = 659841.6606931937
Block iter 2 r = 9.97, logl = -659840.52, tol = 1.7272941257266223e-6
Block iter 3 r = 9.96, logl = -659839.64, tol = 1.3405054942060574e-6
Block iter 4 r = 9.94, logl = -659839.83, tol = 2.966269382129259e-7
Block iter 5 r = 9.93, logl = -659839.95, tol = 1.818060399997583e-7
Block iter 6 r = 9.9, logl = -659840.22, tol = 4.1219480759678596e-7
Block iter 7 r = 9.92, logl = -659840.19, tol = 4.4224329130175466e-8
Block iter 8 r = 9.97, logl = -659840.8, tol = 9.204649132585677e-7
Block iter 9 r = 9.96, logl = -659839.65, tol = 1.7413839249173488e-6
Block iter 10 r = 9.94, logl = -659839.86, tol = 3.183655154674333e-7


In [33]:
loglikelihood!(gcm, true, true)

(25, 3)
(25, 25)
(25, 3)


LoadError: TaskFailedException

[91m    nested task error: [39mUndefVarError: fff not defined
    Stacktrace:
     [1] [0m[1mloglikelihood![22m[0m[1m([22m[90mgc[39m::[0mNBCopulaARObs[90m{Float64, NegativeBinomial{Float64}, LogLink}[39m, [90mβ[39m::[0mVector[90m{Float64}[39m, [90mρ[39m::[0mFloat64, [90mσ2[39m::[0mFloat64, [90mr[39m::[0mFloat64, [90mneedgrad[39m::[0mBool, [90mneedhess[39m::[0mBool; [90mpenalized[39m::[0mBool[0m[1m)[22m
    [90m   @ [39m[35mGLMCopula[39m [90m~/.julia/dev/GLMCopula/src/parameter_estimation/[39m[90m[4mNBCopulaAR.jl:304[24m[39m
     [2] [0m[1mloglikelihood![22m
    [90m   @ [39m[90m~/.julia/dev/GLMCopula/src/parameter_estimation/[39m[90m[4mNBCopulaAR.jl:209[24m[39m[90m [inlined][39m
     [3] [0m[1mmacro expansion[22m
    [90m   @ [39m[90m~/.julia/dev/GLMCopula/src/parameter_estimation/[39m[90m[4mNBCopulaAR.jl:330[24m[39m[90m [inlined][39m
     [4] [0m[1m(::GLMCopula.var"#175#threadsfor_fun#15"{NBCopulaARModel{Float64, NegativeBinomial{Float64}, LogLink}, Bool, Bool, Vector{Float64}, Base.OneTo{Int64}})[22m[0m[1m([22m[90monethread[39m::[0mBool[0m[1m)[22m
    [90m   @ [39m[35mGLMCopula[39m [90m./[39m[90m[4mthreadingconstructs.jl:85[24m[39m
     [5] [0m[1m(::GLMCopula.var"#175#threadsfor_fun#15"{NBCopulaARModel{Float64, NegativeBinomial{Float64}, LogLink}, Bool, Bool, Vector{Float64}, Base.OneTo{Int64}})[22m[0m[1m([22m[0m[1m)[22m
    [90m   @ [39m[35mGLMCopula[39m [90m./[39m[90m[4mthreadingconstructs.jl:52[24m[39m
    
    [91mcaused by: [39mMethodError: no method matching plan_fft!(::Vector{ComplexF64}, ::UnitRange{Int64})
    [0mClosest candidates are:
    [0m  plan_fft!(::AbstractArray; kws...) at ~/.julia/packages/AbstractFFTs/Ih3rT/src/definitions.jl:52
    Stacktrace:
     [1] [0m[1mplan_fft![22m[0m[1m([22m[90mx[39m::[0mVector[90m{ComplexF64}[39m; [90mkws[39m::[0mBase.Pairs[90m{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}[39m[0m[1m)[22m
    [90m   @ [39m[35mAbstractFFTs[39m [90m~/.julia/packages/AbstractFFTs/Ih3rT/src/[39m[90m[4mdefinitions.jl:52[24m[39m
     [2] [0m[1mplan_fft![22m[0m[1m([22m[90mx[39m::[0mVector[90m{ComplexF64}[39m[0m[1m)[22m
    [90m   @ [39m[35mAbstractFFTs[39m [90m~/.julia/packages/AbstractFFTs/Ih3rT/src/[39m[90m[4mdefinitions.jl:52[24m[39m
     [3] [0m[1mfactorize[22m[0m[1m([22m[90mA[39m::[0mSymmetricToeplitz[90m{Float64}[39m[0m[1m)[22m
    [90m   @ [39m[36mToeplitzMatrices[39m [90m~/.julia/packages/ToeplitzMatrices/bWAGV/src/[39m[90m[4mToeplitzMatrices.jl:325[24m[39m
     [4] [0m[1mmul![22m[0m[1m([22m[90mC[39m::[0mMatrix[90m{Float64}[39m, [90mA[39m::[0mSymmetricToeplitz[90m{Float64}[39m, [90mB[39m::[0mMatrix[90m{Float64}[39m, [90mα[39m::[0mFloat64, [90mβ[39m::[0mFloat64[0m[1m)[22m
    [90m   @ [39m[36mToeplitzMatrices[39m [90m~/.julia/packages/ToeplitzMatrices/bWAGV/src/[39m[90m[4mToeplitzMatrices.jl:144[24m[39m
     [5] [0m[1mloglikelihood![22m[0m[1m([22m[90mgc[39m::[0mNBCopulaARObs[90m{Float64, NegativeBinomial{Float64}, LogLink}[39m, [90mβ[39m::[0mVector[90m{Float64}[39m, [90mρ[39m::[0mFloat64, [90mσ2[39m::[0mFloat64, [90mr[39m::[0mFloat64, [90mneedgrad[39m::[0mBool, [90mneedhess[39m::[0mBool; [90mpenalized[39m::[0mBool[0m[1m)[22m
    [90m   @ [39m[32mGLMCopula[39m [90m~/.julia/dev/GLMCopula/src/parameter_estimation/[39m[90m[4mNBCopulaAR.jl:294[24m[39m
     [6] [0m[1mloglikelihood![22m
    [90m   @ [39m[90m~/.julia/dev/GLMCopula/src/parameter_estimation/[39m[90m[4mNBCopulaAR.jl:209[24m[39m[90m [inlined][39m
     [7] [0m[1mmacro expansion[22m
    [90m   @ [39m[90m~/.julia/dev/GLMCopula/src/parameter_estimation/[39m[90m[4mNBCopulaAR.jl:330[24m[39m[90m [inlined][39m
     [8] [0m[1m(::GLMCopula.var"#175#threadsfor_fun#15"{NBCopulaARModel{Float64, NegativeBinomial{Float64}, LogLink}, Bool, Bool, Vector{Float64}, Base.OneTo{Int64}})[22m[0m[1m([22m[90monethread[39m::[0mBool[0m[1m)[22m
    [90m   @ [39m[32mGLMCopula[39m [90m./[39m[90m[4mthreadingconstructs.jl:85[24m[39m
     [9] [0m[1m(::GLMCopula.var"#175#threadsfor_fun#15"{NBCopulaARModel{Float64, NegativeBinomial{Float64}, LogLink}, Bool, Bool, Vector{Float64}, Base.OneTo{Int64}})[22m[0m[1m([22m[0m[1m)[22m
    [90m   @ [39m[32mGLMCopula[39m [90m./[39m[90m[4mthreadingconstructs.jl:52[24m[39m

**Conclusion:** After precompilation, NBAR takes ~25 seconds with single threaded BLAS and 8-thread loglikelihood evaluations.

# Profile code

In [None]:
using ProfileView
BLAS.set_num_threads(1)

@profview GLMCopula.fit!(gcm, maxBlockIter = 20);
@profview GLMCopula.fit!(gcm, maxBlockIter = 20);