# Benchmarks for Neg Bin Compound Symmetry model 

In [1]:
using Revise
using DataFrames, Random, GLM, GLMCopula
using ForwardDiff, Test, LinearAlgebra
using LinearAlgebra: BlasReal, copytri!
using ToeplitzMatrices
using BenchmarkTools

┌ Info: Precompiling GLMCopula [c47b6ae2-b804-4668-9957-eb588c99ffbc]
└ @ Base loading.jl:1423


## 8 BLAS threads, single thread loglikelihood

In [8]:
BLAS.set_num_threads(8)
Threads.nthreads()



1

In [2]:
p_fixed = 3    # number of fixed effects, including intercept

# true parameter values
Random.seed!(12345)
βtrue = rand(Uniform(-2, 2), p_fixed)
rtrue = 10.0
σ2true = [0.5]
ρtrue = [0.5]

function get_V(ρ, n)
    vec = zeros(n)
    vec[1] = 1.0
    for i in 2:n
        vec[i] = ρ
    end
    V = ToeplitzMatrices.SymmetricToeplitz(vec)
    V
end

#simulation parameters
samplesize = 10000

d = NegativeBinomial()
link = LogLink()
D = typeof(d)
Link = typeof(link)
T = Float64
gcs = Vector{NBCopulaCSObs{T, D, Link}}(undef, samplesize)
ni = 5 #  number of observations per individual
V = get_V(ρtrue[1], ni)

# true Gamma
Γ = σ2true[1] * V

# for reproducibility I will simulate all the design matrices here
Random.seed!(12345)
X_samplesize = [randn(ni, p_fixed - 1) for i in 1:samplesize]

for i in 1:samplesize
    X = [ones(ni) X_samplesize[i]]
    η = X * βtrue
    μ = exp.(η)
    p = rtrue ./ (μ .+ rtrue)
    vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
    vecd = [NegativeBinomial(rtrue, p[i]) for i in 1:ni]
    nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ)
    # simuate single vector y
    y = Vector{Float64}(undef, ni)
    res = Vector{Float64}(undef, ni)
    rand(nonmixed_multivariate_dist, y, res)
    # push!(Ystack, y)
    V = [ones(ni, ni)]
    # V = [ones(ni, ni)]
    gcs[i] = NBCopulaCSObs(y, X, d, link)
end

# form model
gcm = NBCopulaCSModel(gcs);

In [3]:
@time GLMCopula.fit!(gcm)

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
initializing σ2 and ρ using method of moments
initializing r using Newton update
initializing variance parameters in CS model using mom
Converging when tol ≤ 1.0e-7 (max block iter = 20)

******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

Block iter 1 r = 9.36, logl = -135079.84, tol = 135079.8380199566
Block iter 2 r = 9.64, logl = -134953.96, tol = 0.0009318868886073533
Block iter 3 r = 9.78, logl = -134949.18, tol = 3.537778806646794e-5
Block iter 4 r = 9.85, logl = -134947.95, tol = 9.110528345101428e-6
Block iter 5 r = 9.89, logl = -

-134947.55180931065

In [6]:
@time GLMCopula.fit!(gcm)

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
initializing σ2 and ρ using method of moments
initializing r using Newton update
initializing variance parameters in CS model using mom
Converging when tol ≤ 1.0e-7 (max block iter = 20)
Block iter 1 r = 9.36, logl = -135079.84, tol = 135079.8380199566
Block iter 2 r = 9.64, logl = -134953.96, tol = 0.0009318868886073533
Block iter 3 r = 9.78, logl = -134949.18, tol = 3.537778806646794e-5
Block iter 4 r = 9.85, logl = -134947.95, tol = 9.110528345101428e-6
Block iter 5 r = 9.89, logl = -134947.65, tol = 2.2649776131051243e-6
Block iter 6 r = 9.91, logl = -134947.58, tol = 5.415343318597491e-7
Block iter 7 r = 9.92, logl = -134947.56, tol = 1.3470829950901994e-7
Block iter 8 r = 9.92, logl = -134947.55, tol = 3.368865965956274e-8
 10.156176 seconds (1.46 M allocations: 318.173 MiB, 0.59% gc time)


-134947.55180931065

In [16]:
@time GLMCopula.fit!(gcm)

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
initializing σ2 and ρ using method of moments
initializing r using Newton update
initializing variance parameters in CS model using mom
Converging when tol ≤ 1.0e-7 (max block iter = 20)
Block iter 1 r = 9.36, logl = -135079.84, tol = 135079.8380199566
Block iter 2 r = 9.64, logl = -134953.96, tol = 0.0009318868886073533
Block iter 3 r = 9.78, logl = -134949.18, tol = 3.537778806646794e-5
Block iter 4 r = 9.85, logl = -134947.95, tol = 9.110528345101428e-6
Block iter 5 r = 9.89, logl = -134947.65, tol = 2.2649776131051243e-6
Block iter 6 r = 9.91, logl = -134947.58, tol = 5.415343318597491e-7
Block iter 7 r = 9.92, logl = -134947.56, tol = 1.3470829950901994e-7
Block iter 8 r = 9.92, logl = -134947.55, tol = 3.368865965956274e-8
 10.572977 seconds (1.46 M allocations: 318.173 MiB, 0.72% gc time)


-134947.55180931065

In [6]:
loglikelihood!(gcm, true, true)
vcov!(gcm)
@show GLMCopula.confint(gcm)

GLMCopula.confint(gcm) = [1.7767887435509857 1.7827801006505772; 1.4666396985495391 1.4724269853136651; -0.6422870042015203 -0.6359903278288017; 0.48386151485999535 0.5542264146640823; 0.4510377837567855 0.5515062782314301; 9.652162733595418 10.188855302885983]


6×2 Matrix{Float64}:
  1.77679    1.78278
  1.46664    1.47243
 -0.642287  -0.63599
  0.483862   0.554226
  0.451038   0.551506
  9.65216   10.1889

After precompilation, NBCS takes ~10 seconds with multithreaded BLAS and single-threaded loglikelihood evaluations.

## 1 BLAS thread, 8 thread loglikelihood

In [2]:
BLAS.set_num_threads(1)
Threads.nthreads()

8

In [5]:
p_fixed = 3    # number of fixed effects, including intercept

# true parameter values
Random.seed!(12345)
βtrue = rand(Uniform(-2, 2), p_fixed)
rtrue = 10.0
σ2true = [0.5]
ρtrue = [0.5]

function get_V(ρ, n)
    vec = zeros(n)
    vec[1] = 1.0
    for i in 2:n
        vec[i] = ρ
    end
    V = ToeplitzMatrices.SymmetricToeplitz(vec)
    V
end

#simulation parameters
samplesize = 10000

d = NegativeBinomial()
link = LogLink()
D = typeof(d)
Link = typeof(link)
T = Float64
gcs = Vector{NBCopulaCSObs{T, D, Link}}(undef, samplesize)
ni = 5 #  number of observations per individual
V = get_V(ρtrue[1], ni)

# true Gamma
Γ = σ2true[1] * V

# for reproducibility I will simulate all the design matrices here
Random.seed!(12345)
X_samplesize = [randn(ni, p_fixed - 1) for i in 1:samplesize]

for i in 1:samplesize
    X = [ones(ni) X_samplesize[i]]
    η = X * βtrue
    μ = exp.(η)
    p = rtrue ./ (μ .+ rtrue)
    vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
    vecd = [NegativeBinomial(rtrue, p[i]) for i in 1:ni]
    nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ)
    # simuate single vector y
    y = Vector{Float64}(undef, ni)
    res = Vector{Float64}(undef, ni)
    rand(nonmixed_multivariate_dist, y, res)
    # push!(Ystack, y)
    V = [ones(ni, ni)]
    # V = [ones(ni, ni)]
    gcs[i] = NBCopulaCSObs(y, X, d, link)
end

# form model
gcm = NBCopulaCSModel(gcs);

In [6]:
@time GLMCopula.fit!(gcm)

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
initializing σ2 and ρ using method of moments
initializing r using Newton update
initializing variance parameters in CS model using mom
Converging when tol ≤ 1.0e-7 (max block iter = 20)
Block iter 1 r = 9.36, logl = -135079.84, tol = 135079.83801995628
Block iter 2 r = 9.64, logl = -134953.96, tol = 0.0009318868886049855
Block iter 3 r = 9.78, logl = -134949.18, tol = 3.53777880686245e-5
Block iter 4 r = 9.85, logl = -134947.95, tol = 9.110528346395428e-6
Block iter 5 r = 9.89, logl = -134947.65, tol = 2.2649776072821646e-6
Block iter 6 r = 9.91, logl = -134947.58, tol = 5.415343398393886e-7
Block iter 7 r = 9.92, logl = -134947.56, tol = 1.3470829411736864e-7
Block iter 8 r = 9.92, logl = -134947.55, tol = 3.368866030656108e-8
  3.730381 seconds (1.48 M allocations: 320.162 MiB, 1.91% gc time)


-134947.55180931097

In [6]:
@time GLMCopula.fit!(gcm)

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
initializing σ2 and ρ using method of moments
initializing r using Newton update
initializing variance parameters in CS model using mom
Converging when tol ≤ 1.0e-7 (max block iter = 20)
Block iter 1 r = 9.36, logl = -135079.84, tol = 135079.83801995628
Block iter 2 r = 9.64, logl = -134953.96, tol = 0.0009318868886049855
Block iter 3 r = 9.78, logl = -134949.18, tol = 3.53777880686245e-5
Block iter 4 r = 9.85, logl = -134947.95, tol = 9.110528346395428e-6
Block iter 5 r = 9.89, logl = -134947.65, tol = 2.2649776074978297e-6
Block iter 6 r = 9.91, logl = -134947.58, tol = 5.415343396237226e-7
Block iter 7 r = 9.92, logl = -134947.56, tol = 1.3470829433303472e-7
Block iter 8 r = 9.92, logl = -134947.55, tol = 3.3688660090894975e-8
  3.715496 seconds (1.48 M allocations: 319.939 MiB, 2.43% gc time)


-134947.55180931094

**Conclusion:** After precompilation, NBCS takes 3-4 seconds with single threaded BLAS and 8-thread loglikelihood evaluations.

# Profile code

In [7]:
using ProfileView
BLAS.set_num_threads(1)
@profview GLMCopula.fit!(gcm);

Initializing NegBin r to Poisson regression values
initializing β using Newton's Algorithm under Independence Assumption
initializing σ2 and ρ using method of moments
initializing r using Newton update
initializing variance parameters in CS model using mom
Converging when tol ≤ 1.0e-7 (max block iter = 20)
Block iter 1 r = 10.34, logl = -134961.85, tol = 134961.8474570041
Block iter 2 r = 10.12, logl = -134950.75, tol = 8.219646759139508e-5
Block iter 3 r = 10.02, logl = -134948.3, tol = 1.8180216728862144e-5
Block iter 4 r = 9.97, logl = -134947.73, tol = 4.2255156950002544e-6
Block iter 5 r = 9.95, logl = -134947.59, tol = 1.0081195550213791e-6
Block iter 6 r = 9.94, logl = -134947.56, tol = 2.416148674806441e-7
Block iter 7 r = 9.93, logl = -134947.55, tol = 5.699377167472242e-8
