## TOC:
* [AR(1)](#ex1)
* [CS positive rho](#ex2)
* [CS negative rho](#ex3)

In [1]:
using GLMCopula, DelimitedFiles, LinearAlgebra, Random, GLM, MixedModels, CategoricalArrays
using Random, Roots, SpecialFunctions, StatsBase
using DataFrames, DelimitedFiles, Statistics, ToeplitzMatrices
import StatsBase: sem

In [20]:
p = 1   # number of fixed effects, including intercept

# true parameter values
βtrue = [log(5.0)]
σ2true = [1.0]
ρtrue = [0.9]

ni = 5 # number of observations per cluster
samplesize = 100000 # number of sampling units

d = Poisson()
link = LogLink()
D = typeof(d)
Link = typeof(link)
T = Float64

Float64

## AR(1) <a class="anchor" id="ex1"></a>


$\rho = 0.9, \sigma^2 = 0.5$


In [13]:
function get_V_AR(ρ, n)
    vec = zeros(n)
    vec[1] = 1.0
    for i in 2:n
        vec[i] = vec[i - 1] * ρ
    end
    V = ToeplitzMatrices.SymmetricToeplitz(vec)
    V
end

V_AR = get_V_AR(ρtrue[1], ni)

# true Gamma
Γ_AR = σ2true[1] * V_AR

5×5 Matrix{Float64}:
 1.0     0.9    0.81  0.729  0.6561
 0.9     1.0    0.9   0.81   0.729
 0.81    0.9    1.0   0.9    0.81
 0.729   0.81   0.9   1.0    0.9
 0.6561  0.729  0.81  0.9    1.0

In [21]:
vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
for i in 1:ni
    vecd[i] = Poisson(5.0)
end
nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ_AR)

Random.seed!(1234)
@time Y_nsample = simulate_nobs_independent_vectors(nonmixed_multivariate_dist, samplesize)

gcs = Vector{GLMCopulaARObs{T, D, Link}}(undef, samplesize)
for i in 1:samplesize
    X = ones(ni, 1)
    y = Float64.(Y_nsample[i])
    V = [ones(ni, ni)]
    gcs[i] = GLMCopulaARObs(y, X, d, link)
end

# form model
gcm = GLMCopulaARModel(gcs);

N = length(gcm.data)
di = length(gcm.data[1].y)
Y_AR = zeros(N, di)
for j in 1:di
    Y_AR[:, j] = [gcm.data[i].y[j] for i in 1:N]
end
empirical_covariance_AR = scattermat(Y_AR) ./ N

  1.811635 seconds (9.00 M allocations: 1.299 GiB, 21.76% gc time)


5×5 Matrix{Float64}:
 6.5928    1.21602   1.16472  1.0455   0.938284
 1.21602   6.57575   1.24751  1.14854  0.979648
 1.16472   1.24751   6.5259   1.21906  1.14271
 1.0455    1.14854   1.21906  6.52164  1.21833
 0.938284  0.979648  1.14271  1.21833  6.57649

In [22]:
StatsBase.cor(Y_AR)

5×5 Matrix{Float64}:
 1.0       0.184686  0.177569  0.159445  0.142496
 0.184686  1.0       0.190436  0.175386  0.148971
 0.177569  0.190436  1.0       0.186865  0.174429
 0.159445  0.175386  0.186865  1.0       0.186033
 0.142496  0.148971  0.174429  0.186033  1.0

## CS positive rho <a class="anchor" id="ex2"></a>


$\rho = 0.9, \sigma^2 = 1.0$



In [36]:
ni = 10
σ2true = [0.1]
function get_V_CS(ρ, n)
    vec = zeros(n)
    vec[1] = 1.0
    for i in 2:n
        vec[i] = ρ
    end
    V = ToeplitzMatrices.SymmetricToeplitz(vec)
    V
end

V_CS = get_V_CS(σ2true[1], ni)

# true Gamma
Γ_CS = σ2true[1] * V_CS

10×10 Matrix{Float64}:
 0.1   0.01  0.01  0.01  0.01  0.01  0.01  0.01  0.01  0.01
 0.01  0.1   0.01  0.01  0.01  0.01  0.01  0.01  0.01  0.01
 0.01  0.01  0.1   0.01  0.01  0.01  0.01  0.01  0.01  0.01
 0.01  0.01  0.01  0.1   0.01  0.01  0.01  0.01  0.01  0.01
 0.01  0.01  0.01  0.01  0.1   0.01  0.01  0.01  0.01  0.01
 0.01  0.01  0.01  0.01  0.01  0.1   0.01  0.01  0.01  0.01
 0.01  0.01  0.01  0.01  0.01  0.01  0.1   0.01  0.01  0.01
 0.01  0.01  0.01  0.01  0.01  0.01  0.01  0.1   0.01  0.01
 0.01  0.01  0.01  0.01  0.01  0.01  0.01  0.01  0.1   0.01
 0.01  0.01  0.01  0.01  0.01  0.01  0.01  0.01  0.01  0.1

In [37]:
vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
for i in 1:ni
    vecd[i] = Poisson(5.0)
end
nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ_CS)

Random.seed!(1234)
@time Y_nsample = simulate_nobs_independent_vectors(nonmixed_multivariate_dist, samplesize)

gcs = Vector{GLMCopulaCSObs{T, D, Link}}(undef, samplesize)
for i in 1:samplesize
    X = ones(ni, 1)
    y = Float64.(Y_nsample[i])
    V = [Float64.(Matrix(I, ni, ni))]
    gcs[i] = GLMCopulaCSObs(y, X, d, link)
end

# form model
gcm = GLMCopulaCSModel(gcs);

N = length(gcm.data)
di = length(gcm.data[1].y)
Y_CS = zeros(N, di)
for j in 1:di
    Y_CS[:, j] = [gcm.data[i].y[j] for i in 1:N]
end
empirical_covariance_CS = scattermat(Y_CS) ./ N

  3.619145 seconds (17.50 M allocations: 3.017 GiB, 22.11% gc time)


10×10 Matrix{Float64}:
 5.41747     0.0212904   0.0417304   …  0.00955482   0.0532355   0.0387852
 0.0212904   5.34812     0.00246353     0.0186979    0.0236595   0.0646903
 0.0417304   0.00246353  5.35091        0.0359896    0.0107176   0.0129346
 0.0607543   0.0319978   0.0245863      0.0271806    0.0502456   0.0389715
 0.0517526   0.00251236  0.0220527      0.0245259   -0.00664124  0.025755
 0.0242266   0.0235919   0.0467988   …  0.0221041    0.0264465   0.0523994
 0.0160578   0.00436615  0.0474033      0.0546825    0.00829739  0.0278084
 0.00955482  0.0186979   0.0359896      5.33645      0.0214395   0.0273709
 0.0532355   0.0236595   0.0107176      0.0214395    5.36208     0.0232895
 0.0387852   0.0646903   0.0129346      0.0273709    0.0232895   5.4024

In [38]:
StatsBase.cor(Y_CS)

10×10 Matrix{Float64}:
 1.0         0.00395536   0.0077507    …  0.00177704   0.00987727  0.00716926
 0.00395536  1.0          0.000460514     0.00349999   0.00441812  0.012035
 0.0077507   0.000460514  1.0             0.00673499   0.00200086  0.00240572
 0.0112594   0.00596835   0.00458473      0.00507537   0.00935981  0.00723251
 0.00960116  0.000469106  0.00411658      0.00458447  -0.00123843  0.00478473
 0.00448988  0.0044005    0.00872693   …  0.0041275    0.00492654  0.00972463
 0.00298498  0.000816867  0.00886642      0.0102418    0.00155034  0.00517649
 0.00177704  0.00349999   0.00673499      1.0          0.00400794  0.00509764
 0.00987727  0.00441812   0.00200086      0.00400794   1.0         0.00432714
 0.00716926  0.012035     0.00240572      0.00509764   0.00432714  1.0

## CS positive rho <a class="anchor" id="ex3"></a>

$\rho = 0.9, \sigma^2 = 10.0 $

In [26]:
V_CS_negative = get_V_CS(ρtrue[1], ni)

# true Gamma
Γ_CS_negative = 10.0 * V_CS_negative

5×5 Matrix{Float64}:
 10.0   9.0   9.0   9.0   9.0
  9.0  10.0   9.0   9.0   9.0
  9.0   9.0  10.0   9.0   9.0
  9.0   9.0   9.0  10.0   9.0
  9.0   9.0   9.0   9.0  10.0

In [27]:
vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
for i in 1:ni
    vecd[i] = Poisson(5.0)
end
nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ_CS_negative)

Random.seed!(1234)
@time Y_nsample = simulate_nobs_independent_vectors(nonmixed_multivariate_dist, samplesize)

gcs = Vector{GLMCopulaCSObs{T, D, Link}}(undef, samplesize)
for i in 1:samplesize
    X = ones(ni, 1)
    y = Float64.(Y_nsample[i])
    V = [Float64.(Matrix(I, ni, ni))]
    gcs[i] = GLMCopulaCSObs(y, X, d, link)
end

# form model
gcm = GLMCopulaCSModel(gcs);

N = length(gcm.data)
di = length(gcm.data[1].y)
Y_CS_2 = zeros(N, di)
for j in 1:di
    Y_CS_2[:, j] = [gcm.data[i].y[j] for i in 1:N]
end
empirical_covariance_CS_negative = scattermat(Y_CS_2) ./ N

  1.846527 seconds (9.00 M allocations: 1.299 GiB, 22.47% gc time)


5×5 Matrix{Float64}:
 7.12912  1.66062  1.69437  1.71988  1.69704
 1.66062  7.10645  1.72291  1.68898  1.71246
 1.69437  1.72291  7.0468   1.66485  1.69184
 1.71988  1.68898  1.66485  7.04307  1.67287
 1.69704  1.71246  1.69184  1.67287  7.10509

In [28]:
StatsBase.cor(Y_CS_2)

5×5 Matrix{Float64}:
 1.0       0.233306  0.239053  0.242717  0.238446
 0.233306  1.0       0.243468  0.238736  0.240995
 0.239053  0.243468  1.0       0.236318  0.2391
 0.242717  0.238736  0.236318  1.0       0.236481
 0.238446  0.240995  0.2391    0.236481  1.0

## Comparing <a class="anchor" id="ex3"></a>

$\rho = 0.6, \sigma^2 = 0.1$

In [15]:
empirical_covariance_AR

5×5 Matrix{Float64}:
 5.4753     0.239691  0.155916  0.103735  0.0684627
 0.239691   5.46275   0.237582  0.124543  0.071047
 0.155916   0.237582  5.40272   0.221336  0.14614
 0.103735   0.124543  0.221336  5.40579   0.241656
 0.0684627  0.071047  0.14614   0.241656  5.46087

In [16]:
empirical_covariance_CS

5×5 Matrix{Float64}:
 5.4753    0.239691  0.253737  0.256011  0.251832
 0.239691  5.46275   0.239646  0.228651  0.229315
 0.253737  0.239646  5.40309   0.220851  0.240448
 0.256011  0.228651  0.220851  5.40391   0.243061
 0.251832  0.229315  0.240448  0.243061  5.46092

$\rho = -0.6, \sigma^2 = 0.1$

In [17]:
empirical_covariance_CS_negative

5×5 Matrix{Float64}:
  5.4753    -0.247838  -0.231306  -0.225734  -0.224622
 -0.247838   5.46342   -0.275844  -0.265809  -0.259551
 -0.231306  -0.275844   5.4141    -0.234971  -0.266396
 -0.225734  -0.265809  -0.234971   5.40746   -0.229744
 -0.224622  -0.259551  -0.266396  -0.229744   5.46245