# Fit Neg Binomial Auto-regressive model 

Taking inspiration from [GLM.jl](https://github.com/JuliaStats/GLM.jl/blob/master/src/negbinfit.jl#L68), we will:
+ Initialize `r` with Poisson regression fit
+ Perform block updates:
    - Fix $r$, fit negative binomial copula for 10 iterations
    - Fix $\beta$ and AR parameters, fit $r$ using Newton for 10 iterations
    - Repeat until convergence

# Block updates

In [23]:
using Revise
using DataFrames, Random, GLM, GLMCopula
using ForwardDiff, Test, LinearAlgebra
using LinearAlgebra: BlasReal, copytri!
using ToeplitzMatrices

function get_V(ρ, n)
    vec = zeros(n)
    vec[1] = 1.0
    for i in 2:n
        vec[i] = vec[i - 1] * ρ
    end
    V = ToeplitzMatrices.SymmetricToeplitz(vec)
    V
end

# simulation parameters
Random.seed!(12345)
N = 10000    # sample size
n = 5        # observations per subject
p = 3        # number of fixed effects, including intercept
σ2true = 0.1 # true σ2
ρtrue = 0.9  # true ρ
r = 10      # true dispersion parameter in NB model

# simulation variables
β_true = ones(p)     # true beta
V = get_V(ρtrue, n)  # true variance
Γ = σ2true * V       # true Gamma
X = [ones(n) randn(n, p - 1)]
η = X * β_true
μ = exp.(η)
prob = r ./ (μ .+ r)

# now simulate phenotypes
vecd = [NegativeBinomial(r, prob[i]) for i in 1:n]
nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ)
Y_Nsample = simulate_nobs_independent_vectors(nonmixed_multivariate_dist, N)

10000-element Vector{Vector{Float64}}:
 [7.0, 5.0, 2.0, 19.0, 2.0]
 [7.0, 1.0, 2.0, 35.0, 3.0]
 [20.0, 0.0, 7.0, 19.0, 5.0]
 [11.0, 2.0, 5.0, 46.0, 2.0]
 [6.0, 0.0, 3.0, 12.0, 2.0]
 [7.0, 1.0, 5.0, 15.0, 8.0]
 [7.0, 0.0, 8.0, 7.0, 6.0]
 [11.0, 1.0, 1.0, 17.0, 6.0]
 [12.0, 2.0, 2.0, 19.0, 8.0]
 [8.0, 2.0, 5.0, 13.0, 6.0]
 [10.0, 1.0, 4.0, 20.0, 2.0]
 [8.0, 3.0, 8.0, 12.0, 3.0]
 [10.0, 2.0, 14.0, 14.0, 7.0]
 ⋮
 [12.0, 0.0, 3.0, 7.0, 1.0]
 [15.0, 0.0, 5.0, 20.0, 6.0]
 [10.0, 0.0, 8.0, 26.0, 4.0]
 [11.0, 4.0, 7.0, 27.0, 4.0]
 [11.0, 0.0, 3.0, 14.0, 6.0]
 [11.0, 0.0, 3.0, 14.0, 1.0]
 [8.0, 0.0, 9.0, 11.0, 3.0]
 [16.0, 1.0, 3.0, 28.0, 1.0]
 [10.0, 1.0, 1.0, 23.0, 4.0]
 [12.0, 2.0, 3.0, 14.0, 11.0]
 [7.0, 3.0, 9.0, 23.0, 6.0]
 [7.0, 0.0, 6.0, 10.0, 4.0]

In [29]:
Random.seed!(12345)
d = NegativeBinomial()
link = LogLink()
gcs = Vector{NBCopulaARObs{Float64, typeof(d), typeof(link)}}(undef, N)
y = Vector{Float64}(undef, n)
res = Vector{Float64}(undef, n)
for i in 1:N
    Random.seed!(12345 * i)
    X = [ones(n) randn(n, p - 1)]
    η = X * β_true
    μ = exp.(η)
    prob = r ./ (μ .+ r)
    vecd = [NegativeBinomial(r, prob[i]) for i in 1:n]
    for i in 1:n
        vecd[i] = NegativeBinomial(r, prob[i])
    end
    Random.seed!(12345 * i)
    rand(nonmixed_multivariate_dist, y, res)
    gcs[i] = NBCopulaARObs(y, X, d, link)
end
gcm = NBCopulaARModel(gcs);
# gcm.r[1] = r

In [30]:
initialize_model!(gcm)
@show gcm.β
@show gcm.r
@show gcm.σ2
@show gcm.ρ

Initializing NegBin r to Poisson regression values
gcm.θ = [0.07030764085865944, 0.07240870074013135, 0.06642004483834663, 1.0, 1.0]
gcm.θ = [0.07030764085865944, 0.07240870074013135, 0.06642004483834663, 0.9900000098999999, 1.0]
gcm.θ = [3.1953248601549364, 0.0799536737719938, 0.042816740810371734, 0.9757131019116224, 1.0152383340055944]
gcm.θ = [2.4646437617604153, 0.002394423339082477, 0.012060290947655941, 0.9800932016903732, 1.0156191491692848]
gcm.θ = [2.405240927620133, 0.0007311026146579374, 0.00665196667637115, 0.8714057880927467, 1.017705260979438]
gcm.θ = [2.4489468258900877, -0.01185214719346259, -0.0021066741245765836, 0.7805434246495431, 1.0224400395550592]
gcm.θ = [2.6279438196687748, 0.018111326352394815, 0.023771280188045404, 0.3941744244980192, 1.0466477613736371]
gcm.θ = [2.6131734259308983, -0.027363102358052182, -0.027865200082101124, 0.3166161914707083, 1.0731695703717623]
gcm.θ = [2.594419745176933, -0.029118543558182133, -0.00952102300997134, 0.22608244271717037

1-element Vector{Float64}:
 18350.90907816705

In [31]:
gcm.β

3-element Vector{Float64}:
 1.0
 1.0
 1.0

In [32]:
GLMCopula.loglikelihood!(gcm, true, true)

-9.404260972961048e8

In [33]:
# default is Quasi-Newton
@time GLMCopula.fit!(gcm, maxBlockIter=3)

gcm.θ = [1.0, 1.0, 1.0, 1.0, 433.42758864068696]
gcm.θ = [1.0, 1.0, 1.0, 0.9900000098999999, 433.42758864068696]
gcm.θ = [3.699744581036897, -2.7110662116004, -2.704624020712, 0.4999500000494999, 390.2416218188902]
gcm.θ = [2.5042394725022525, -2.3168262189208684, -2.118742659146565, 0.0049994901004949965, 294.11240335723727]
gcm.θ = [5.511546980510526, 0.23504372915973715, -2.1507912672731915, 0.0050559491078358216, 275.9141983988895]
gcm.θ = [8.437173011286065, 0.072537176253643, 0.23812697853240117, 0.021947625350599364, 207.62543429268933]
gcm.θ = [6.958489051080894, -1.7960276498923766, 0.2762124513869084, 0.026503202066259494, 2.0762543330268954]
gcm.θ = [4.243159601081151, -0.42983335997453453, 0.8172888704127541, 0.9902650419206626, 2.1682688528261873]
gcm.θ = [4.16410809234154, -0.40795249056576444, 0.7537625064883684, 0.9889548071092624, 0.9623964321528027]
gcm.θ = [3.6735924009496532, -0.3223360876324633, 0.4296268975597185, 0.8891924842683124, 4.042608172723149]
gcm.θ = [3.

-1.2947570739773125e6

In [16]:
# default is Quasi-Newton
@time GLMCopula.fit!(gcm, maxBlockIter=100)

Converging when tol ≤ 0.0001 (max block iter = 100)
Block iter 1 r = 66.26, logl = -178754.28, tol = 178754.2783140062
Block iter 2 r = 67.57, logl = -178752.87, tol = 1.4126160731830169
Block iter 3 r = 69.01, logl = -178751.25, tol = 1.6173761388636194
Block iter 4 r = 70.56, logl = -178749.43, tol = 1.8178635479998775
Block iter 5 r = 72.23, logl = -178747.43, tol = 1.9995644706650637
Block iter 6 r = 73.99, logl = -178745.29, tol = 2.141556122689508
Block iter 7 r = 75.83, logl = -178743.06, tol = 2.225642575445818
Block iter 8 r = 77.77, logl = -178740.82, tol = 2.2392860002582893
Block iter 9 r = 79.71, logl = -178738.57, tol = 2.2531988029077183
Block iter 10 r = 81.61, logl = -178736.48, tol = 2.0906419749662746
Block iter 11 r = 83.59, logl = -178734.6, tol = 1.8810184785397723
Block iter 12 r = 85.32, logl = -178732.83, tol = 1.7728042982635088
Block iter 13 r = 87.05, logl = -178731.46, tol = 1.3699814415886067
Block iter 14 r = 88.98, logl = -178730.32, tol = 1.136312451650

-178726.92165966445

In [17]:
println("estimated β = $(gcm.β[1]); true β = $β_true")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2")
println("estimated r = $(gcm.r[1]); true r = $r");

estimated β = 3.7577367229152956; true β = 3.757872325600888
estimated variance component 1 = 0.08454686818715344; true variance component 1 = 0.1
estimated variance component 2 = 0.3859156499415465; true variance component 2 = 0.5
estimated r = 96.0785202732631; true r = 100


In [7]:
println("estimated β = $(gcm.β[1]); true β = $β_true")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2")
println("estimated r = $(gcm.r[1]); true r = $r");

estimated β = 3.7577367229152956; true β = 3.757872325600888
estimated variance component 1 = 0.08454686818715344; true variance component 1 = 0.1
estimated variance component 2 = 0.3859156499415465; true variance component 2 = 0.5
estimated r = 96.0785202732631; true r = 100


In [7]:
println("estimated β = $(gcm.β[1]); true β = $β_true")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2")
println("estimated r = $(gcm.r[1]); true r = $r");

estimated β = 1.454827543795997; true β = 1.4552872326068422
estimated variance component 1 = 0.10456366108069844; true variance component 1 = 0.1
estimated variance component 2 = 0.44579361811367024; true variance component 2 = 0.5
estimated r = 9.969213381836406; true r = 10


# Benchmark/profile code

In [191]:
using BenchmarkTools, SpecialFunctions

function update_HΣ_test!(gcm)
    T = Float64
    fill!(gcm.HΣ, 0.0)
    @inbounds for j in 1:gcm.m
        @simd for i in 1:length(gcm.storage_n)
            gcm.hess1[j, i] = gcm.QF[i, j] * gcm.storage_n[i]
            gcm.hess2[j, i] = gcm.TR[i, j] * gcm.storage_n2[i]
        end
    end
    BLAS.gemm!('N', 'T', -T(1), gcm.hess1, gcm.hess1, T(0), gcm.HΣ)
    BLAS.gemm!('N', 'T', T(1), gcm.hess2, gcm.hess2, T(1), gcm.HΣ)
end

update_HΣ_test! (generic function with 1 method)

In [192]:
gc = gcm.data[1]
β = gcm.β
@btime update_HΣ_test!($gcm)

  65.226 μs (0 allocations: 0 bytes)


2×2 Matrix{Float64}:
 23863.7  23863.7
 23863.7  23863.7

# Let's see how to optimally block update 

## 1 iteration each

In [58]:
println("estimated β = $(gcm.β[1]); true β = $β_true")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2")
println("estimated r = $(gcm.r[1]); true r = $r");

estimated β = 3.7640471229274333; true β = 3.757872325600888
estimated variance component 1 = 0.010091096783959048; true variance component 1 = 0.1
estimated variance component 2 = 0.010061528161806528; true variance component 2 = 0.5
estimated r = 59.22108970817275; true r = 100


## 5 iteration each

In [45]:
println("estimated β = $(gcm.β[1]); true β = $β_true")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2")
println("estimated r = $(gcm.r[1]); true r = $r");

estimated β = 3.761120679347187; true β = 3.757872325600888
estimated variance component 1 = 0.04200544450800426; true variance component 1 = 0.1
estimated variance component 2 = 0.020655323963911138; true variance component 2 = 0.5
estimated r = 64.2226158070482; true r = 100


## 10 iteration each

In [51]:
println("estimated β = $(gcm.β[1]); true β = $β_true")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2")
println("estimated r = $(gcm.r[1]); true r = $r");

estimated β = 3.7577367229152965; true β = 3.757872325600888
estimated variance component 1 = 0.08454686818713725; true variance component 1 = 0.1
estimated variance component 2 = 0.3859156499408988; true variance component 2 = 0.5
estimated r = 96.07852027325903; true r = 100


## 1 IPOPT iter + complete Newton fit

In [65]:
println("estimated β = $(gcm.β[1]); true β = $β_true")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2")
println("estimated r = $(gcm.r[1]); true r = $r");

estimated β = 3.762274167394871; true β = 3.757872325600888
estimated variance component 1 = 0.044468809218469255; true variance component 1 = 0.1
estimated variance component 2 = 0.046354142614646404; true variance component 2 = 0.5
estimated r = 67.38079696512439; true r = 100


## 5 IPOPT iter + complete Newton fit

In [72]:
println("estimated β = $(gcm.β[1]); true β = $β_true")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2")
println("estimated r = $(gcm.r[1]); true r = $r");

estimated β = 3.759537350779348; true β = 3.757872325600888
estimated variance component 1 = 0.05312813376763176; true variance component 1 = 0.1
estimated variance component 2 = 0.07266779547787298; true variance component 2 = 0.5
estimated r = 70.93622181411288; true r = 100


## 10 IPOPT iter + complete Newton fit (this reached max iter count)

In [79]:
println("estimated β = $(gcm.β[1]); true β = $β_true")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2")
println("estimated r = $(gcm.r[1]); true r = $r");

estimated β = 3.757598085312234; true β = 3.757872325600888
estimated variance component 1 = 0.08847643797375318; true variance component 1 = 0.1
estimated variance component 2 = 0.418885780706873; true variance component 2 = 0.5
estimated r = 98.01186301721411; true r = 100


## Copying GLM.jl

In [7]:
println("estimated β = $(gcm.β[1]); true β = $β_true")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2")
println("estimated r = $(gcm.r[1]); true r = $r");

estimated β = 1.456226847936331; true β = 1.4552872326068422
estimated variance component 1 = 0.10133551134062899; true variance component 1 = 0.1
estimated variance component 2 = 0.4371874111348784; true variance component 2 = 0.5
estimated r = 9.69061414063922; true r = 10


In [73]:
println("estimated β = $(gcm.β[1]); true β = $β_true")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2")
println("estimated r = $(gcm.r[1]); true r = $r");

estimated β = 3.758435534322703; true β = 3.757872325600888
estimated variance component 1 = 0.0934850410967605; true variance component 1 = 0.1
estimated variance component 2 = 0.3827355454688751; true variance component 2 = 0.5
estimated r = 94.45211633725886; true r = 100
