### Model Fit Comparison: LMM vs GLMCopula

# Dyestuff Data

This is a random intercept model Y ~ 1 + (1|G) for a data set in R. The fitted model by the MixedModels.jl package yields a loglikelihood of -163.66353. We will see the copula model has a slightly better fit with loglikelihood -163.35545.

In [None]:
using RDatasets, Test, GLM, LinearAlgebra, GLMCopula
using LinearAlgebra: BlasReal, copytri!
# Dataframe with columns: Batch (Categorical), Yield (Int32)
dyestuff = dataset("lme4", "Dyestuff")
groups = unique(dyestuff[!, :Batch])
n, p, m = length(groups), 1, 2
d = Normal()
link = IdentityLink()
D = typeof(d)
Link = typeof(link)
T = Float64
gcs = Vector{GLMCopulaVCObs{T, D, Link}}(undef, n)
for (i, grp) in enumerate(groups)
    gidx = dyestuff[!, :Batch] .== grp
    ni = count(gidx)
    y = Float64.(dyestuff[gidx, :Yield])
    X = ones(ni, 1)
    V = [ones(ni, ni), Matrix(I, ni, ni)]
    gcs[i] = GLMCopulaVCObs(y, X, V, d, link)
end
gcm = GLMCopulaVCModel(gcs);

In [None]:
# initialize β and τ from least square solution
@info "Initial point:"
initialize_model!(gcm);
#gcm.β .= [1527.4999999999998]
@show gcm.β
# update σ2 and τ from β using the MM algorithm
fill!(gcm.Σ, 1)
# update_Σ!(gcm, 500, 1e-6, GurobiSolver(OutputFlag=0), true)
update_Σ!(gcm)
@show gcm.τ
@show gcm.Σ;

In [None]:
@show loglikelihood!(gcm, true, true)
@test loglikelihood!(gcm, true, true) ≈ -164.0008237940362

In [None]:
# fit model using NLP on profiled loglikelihood
@info "MLE:"
# @time GeneralizedCopula.fit!(gcm, IpoptSolver(print_level=5))
@time fit2!(gcm, IpoptSolver(print_level = 5, derivative_test = "first-order"))

In [None]:
@show gcm.β
@show gcm.τ
@show gcm.Σ
@show loglikelihood!(gcm, true, true)
@test loglikelihood!(gcm, true, false) ≈ -163.35545423301846
@show gcm.∇β
@show gcm.Hβ