# Multivariate Logistic Regression using hybrid MM-initial + Newton's for joint estimation
    
   ### Simulation Parameters (Truth):
   $$p = 0.300, vc_1 = 0.100, vc_2 = 0.100$$
    
    
       1. Using initial MM + Newton + mehrotra: (59 iterations, 35.8 seconds)
   $$\hat p = 0.301, \hat vc_1 = 0.082, \hat vc_2 = 0.080$$
    

In [1]:
using GLMCopula, Random, Statistics, Test, LinearAlgebra, StatsFuns, GLM, Revise

In [2]:
Random.seed!(1234)
n = 50
variance_component_1 = 0.1
variance_component_2 = 0.1
Γ = variance_component_1 * ones(n, n) + variance_component_2 * Matrix(I, n, n)

mean = 0.3
dist = Bernoulli
vecd = [dist(mean) for i in 1:n]
    
nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ)

NonMixedMultivariateDistribution{Discrete,Bernoulli{Float64},Array{Bernoulli{Float64},1}}(Bernoulli{Float64}[Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3)  …  Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3), Bernoulli{Float64}(p=0.3)], [0.2 0.1 … 0.1 0.1; 0.1 0.2 … 0.1 0.1; … ; 0.1 0.1 … 0.2 0.1; 0.1 0.1 … 0.1 0.2], 9.999999999999996, Union{ContinuousUnivariateCopula, DiscreteUnivariateCopula}[#undef, #undef, #undef, #undef, #undef, #undef, #undef, #undef, #undef, #undef  …  #undef, #undef, #undef, #undef, #undef, #undef, #undef, #undef, #undef, #undef])

In [3]:
nsample = 10_000
Random.seed!(1234)
@time Y_nsample = simulate_nobs_independent_vectors(nonmixed_multivariate_dist, nsample)

  3.285754 seconds (9.72 M allocations: 7.164 GiB, 23.00% gc time)


10000-element Array{Array{Float64,1},1}:
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0  …  1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0]
 [0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0  …  0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0]
 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
 [0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0  …  0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0]
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0  …  0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
 [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0]
 [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0  …  1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0]
 [0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0  …  1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0]
 [0.0, 0.0, 1.

In [4]:
d = Bernoulli()
link = LogitLink()
D = typeof(d)
Link = typeof(link)
T = Float64
gcs = Vector{GLMCopulaVCObs{T, D, Link}}(undef, nsample)
for i in 1:nsample
    y = Float64.(Y_nsample[i])
    X = ones(n, 1)
    V = [ones(n, n), Matrix(I, n, n)]
    gcs[i] = GLMCopulaVCObs(y, X, V, d, link)
end
gcm = GLMCopulaVCModel(gcs);

initialize_model!(gcm)
@show gcm.β

initializing β using Newton's Algorithm under Independence Assumption
1 0.0 -308675.8817541355 39999
2 -308675.8817541355 -308675.8817541355 39999
initializing variance components using MM-Algorithm
gcm.β = [-0.8102932942177389]


1-element Array{Float64,1}:
 -0.8102932942177389

In [5]:
loglikelihood!(gcm, true, true)

-307479.5042351658

## Newton using Hessian + mehrotra

In [6]:
@time GLMCopula.fit!(gcm, IpoptSolver(print_level = 5, max_iter = 100, mehrotra_algorithm="yes", warm_start_init_point="yes", hessian_approximation = "exact"))


******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

This is Ipopt version 3.13.4, running with linear solver mumps.
NOTE: Other linear solvers might be more efficient (see Ipopt documentation).

Number of nonzeros in equality constraint Jacobian...:        0
Number of nonzeros in inequality constraint Jacobian.:        0
Number of nonzeros in Lagrangian Hessian.............:        4

Total number of variables............................:        3
                     variables with only lower bounds:        2
                variables with lower and upper bounds:        0
                     variables with only upper bounds:        0
Total number of equal

-307460.9619861399

In [7]:
@show GLMCopula.loglikelihood!(gcm, true, true);

GLMCopula.loglikelihood!(gcm, true, true) = -307460.9619861399


In [8]:
@show gcm.θ;

gcm.θ = [-0.8403375636367664, 0.08190263795756803, 0.07951057908946718]


In [9]:
@show gcm.∇θ;

gcm.∇θ = [-1.1232847541009505e-7, 1.2900272849947214e-8, -6.235495675355196e-9]


In [10]:
println("estimated mean = $(exp(gcm.β[1]) / (1 + exp(gcm.β[1]))); true mean value= $mean")
println("estimated variance component 1 = $(gcm.Σ[1]); true variance component 1 = $variance_component_1")
println("estimated variance component 2 = $(gcm.Σ[2]); true variance component 2 = $variance_component_2");

estimated mean = 0.3014636939573119; true mean value= 0.3
estimated variance component 1 = 0.08190263795756803; true variance component 1 = 0.1
estimated variance component 2 = 0.07951057908946718; true variance component 2 = 0.1


In [11]:
using BenchmarkTools

@benchmark loglikelihood!($gcm, true, true)

BenchmarkTools.Trial: 
  memory estimate:  137.33 MiB
  allocs estimate:  1500000
  --------------
  minimum time:     204.282 ms (0.00% GC)
  median time:      216.192 ms (0.00% GC)
  mean time:        225.177 ms (6.33% GC)
  maximum time:     246.732 ms (13.47% GC)
  --------------
  samples:          23
  evals/sample:     1

In [12]:
@benchmark update_∇Σ!($gcm)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     28.108 ms (0.00% GC)
  median time:      29.664 ms (0.00% GC)
  mean time:        30.138 ms (0.00% GC)
  maximum time:     36.814 ms (0.00% GC)
  --------------
  samples:          166
  evals/sample:     1

In [13]:
@benchmark update_HΣ!($gcm)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     68.047 μs (0.00% GC)
  median time:      69.235 μs (0.00% GC)
  mean time:        72.065 μs (0.00% GC)
  maximum time:     360.865 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1