## TOC:
* [AR(1)](#ex1)
* [CS positive rho](#ex2)
* [CS negative rho](#ex3)

In [1]:
using GLMCopula, DelimitedFiles, LinearAlgebra, Random, GLM, MixedModels, CategoricalArrays
using Random, Roots, SpecialFunctions, StatsBase
using DataFrames, DelimitedFiles, Statistics, ToeplitzMatrices
import StatsBase: sem

In [2]:
p = 1   # number of fixed effects, including intercept

# true parameter values
βtrue = [log(5.0)]
σ2true = [0.1]
ρtrue = [0.6]

ni = 5 # number of observations per cluster
samplesize = 100000 # number of sampling units

d = Poisson()
link = LogLink()
D = typeof(d)
Link = typeof(link)
T = Float64

Float64

## AR(1) <a class="anchor" id="ex1"></a>


$\rho = 0.6, \sigma^2 = 0.1$


In [3]:
function get_V_AR(ρ, n)
    vec = zeros(n)
    vec[1] = 1.0
    for i in 2:n
        vec[i] = vec[i - 1] * ρ
    end
    V = ToeplitzMatrices.SymmetricToeplitz(vec)
    V
end

V_AR = get_V_AR(ρtrue[1], ni)

# true Gamma
Γ_AR = σ2true[1] * V_AR

5×5 Matrix{Float64}:
 0.1      0.06    0.036  0.0216  0.01296
 0.06     0.1     0.06   0.036   0.0216
 0.036    0.06    0.1    0.06    0.036
 0.0216   0.036   0.06   0.1     0.06
 0.01296  0.0216  0.036  0.06    0.1

In [4]:
vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
for i in 1:ni
    vecd[i] = Poisson(5.0)
end
nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ_AR)

Random.seed!(1234)
@time Y_nsample = simulate_nobs_independent_vectors(nonmixed_multivariate_dist, samplesize)

gcs = Vector{GLMCopulaARObs{T, D, Link}}(undef, samplesize)
for i in 1:samplesize
    X = ones(ni, 1)
    y = Float64.(Y_nsample[i])
    V = [ones(ni, ni)]
    gcs[i] = GLMCopulaARObs(y, X, d, link)
end

# form model
gcm = GLMCopulaARModel(gcs);

N = length(gcm.data)
di = length(gcm.data[1].y)
Y_AR = zeros(N, di)
for j in 1:di
    Y_AR[:, j] = [gcm.data[i].y[j] for i in 1:N]
end
empirical_covariance_AR = scattermat(Y_AR) ./ N

  1.993135 seconds (10.74 M allocations: 1.408 GiB, 8.34% gc time, 27.27% compilation time)


5×5 Matrix{Float64}:
 5.4753     0.239691  0.155916  0.103735  0.0684627
 0.239691   5.46275   0.237582  0.124543  0.071047
 0.155916   0.237582  5.40272   0.221336  0.14614
 0.103735   0.124543  0.221336  5.40579   0.241656
 0.0684627  0.071047  0.14614   0.241656  5.46087

In [5]:
StatsBase.cor(Y_AR)

5×5 Matrix{Float64}:
 1.0        0.043827   0.0286668  0.0190674  0.0125204
 0.043827   1.0        0.0437322  0.0229184  0.013008
 0.0286668  0.0437322  1.0        0.0409559  0.0269049
 0.0190674  0.0229184  0.0409559  1.0        0.0444772
 0.0125204  0.013008   0.0269049  0.0444772  1.0

In [6]:
Γ_AR

5×5 Matrix{Float64}:
 0.1      0.06    0.036  0.0216  0.01296
 0.06     0.1     0.06   0.036   0.0216
 0.036    0.06    0.1    0.06    0.036
 0.0216   0.036   0.06   0.1     0.06
 0.01296  0.0216  0.036  0.06    0.1

## CS positive rho <a class="anchor" id="ex2"></a>


$\rho = 0.6, \sigma^2 = 0.1$



In [7]:
function get_V_CS(ρ, n)
    vec = zeros(n)
    vec[1] = 1.0
    for i in 2:n
        vec[i] = ρ
    end
    V = ToeplitzMatrices.SymmetricToeplitz(vec)
    V
end

V_CS = get_V_CS(ρtrue[1], ni)

# true Gamma
Γ_CS = σ2true[1] * V_CS

5×5 Matrix{Float64}:
 0.1   0.06  0.06  0.06  0.06
 0.06  0.1   0.06  0.06  0.06
 0.06  0.06  0.1   0.06  0.06
 0.06  0.06  0.06  0.1   0.06
 0.06  0.06  0.06  0.06  0.1

In [8]:
vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
for i in 1:ni
    vecd[i] = Poisson(5.0)
end
nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ_CS)

Random.seed!(1234)
@time Y_nsample = simulate_nobs_independent_vectors(nonmixed_multivariate_dist, samplesize)

gcs = Vector{GLMCopulaCSObs{T, D, Link}}(undef, samplesize)
for i in 1:samplesize
    X = ones(ni, 1)
    y = Float64.(Y_nsample[i])
    V = [Float64.(Matrix(I, ni, ni))]
    gcs[i] = GLMCopulaCSObs(y, X, d, link)
end

# form model
gcm = GLMCopulaCSModel(gcs);

N = length(gcm.data)
di = length(gcm.data[1].y)
Y_CS = zeros(N, di)
for j in 1:di
    Y_CS[:, j] = [gcm.data[i].y[j] for i in 1:N]
end
empirical_covariance_CS = scattermat(Y_CS) ./ N

  1.971711 seconds (9.00 M allocations: 1.299 GiB, 27.56% gc time)


5×5 Matrix{Float64}:
 5.4753    0.239691  0.253737  0.256011  0.251832
 0.239691  5.46275   0.239646  0.228651  0.229315
 0.253737  0.239646  5.40309   0.220851  0.240448
 0.256011  0.228651  0.220851  5.40391   0.243061
 0.251832  0.229315  0.240448  0.243061  5.46092

In [9]:
StatsBase.cor(Y_CS)

5×5 Matrix{Float64}:
 1.0        0.043827   0.0466507  0.0470653  0.0460547
 0.043827   1.0        0.0441106  0.0420836  0.041985
 0.0466507  0.0441106  1.0        0.0408719  0.0442656
 0.0470653  0.0420836  0.0408719  1.0        0.0447434
 0.0460547  0.041985   0.0442656  0.0447434  1.0

In [10]:
Γ_CS

5×5 Matrix{Float64}:
 0.1   0.06  0.06  0.06  0.06
 0.06  0.1   0.06  0.06  0.06
 0.06  0.06  0.1   0.06  0.06
 0.06  0.06  0.06  0.1   0.06
 0.06  0.06  0.06  0.06  0.1

## CS negative rho <a class="anchor" id="ex3"></a>

$\rho = -0.6, \sigma^2 = 0.1$

In [11]:
V_CS_negative = get_V_CS(-ρtrue[1], ni)

# true Gamma
Γ_CS_negative = σ2true[1] * V_CS_negative

5×5 Matrix{Float64}:
  0.1   -0.06  -0.06  -0.06  -0.06
 -0.06   0.1   -0.06  -0.06  -0.06
 -0.06  -0.06   0.1   -0.06  -0.06
 -0.06  -0.06  -0.06   0.1   -0.06
 -0.06  -0.06  -0.06  -0.06   0.1

In [12]:
vecd = Vector{DiscreteUnivariateDistribution}(undef, ni)
for i in 1:ni
    vecd[i] = Poisson(5.0)
end
nonmixed_multivariate_dist = NonMixedMultivariateDistribution(vecd, Γ_CS_negative)

Random.seed!(1234)
@time Y_nsample = simulate_nobs_independent_vectors(nonmixed_multivariate_dist, samplesize)

gcs = Vector{GLMCopulaCSObs{T, D, Link}}(undef, samplesize)
for i in 1:samplesize
    X = ones(ni, 1)
    y = Float64.(Y_nsample[i])
    V = [Float64.(Matrix(I, ni, ni))]
    gcs[i] = GLMCopulaCSObs(y, X, d, link)
end

# form model
gcm = GLMCopulaCSModel(gcs);

N = length(gcm.data)
di = length(gcm.data[1].y)
Y_CS_2 = zeros(N, di)
for j in 1:di
    Y_CS_2[:, j] = [gcm.data[i].y[j] for i in 1:N]
end
empirical_covariance_CS_negative = scattermat(Y_CS_2) ./ N

  2.052073 seconds (9.00 M allocations: 1.299 GiB, 32.72% gc time)


5×5 Matrix{Float64}:
  5.4753    -0.247838  -0.231306  -0.225734  -0.224622
 -0.247838   5.46342   -0.275844  -0.265809  -0.259551
 -0.231306  -0.275844   5.4141    -0.234971  -0.266396
 -0.225734  -0.265809  -0.234971   5.40746   -0.229744
 -0.224622  -0.259551  -0.266396  -0.229744   5.46245

In [13]:
StatsBase.cor(Y_CS_2)

5×5 Matrix{Float64}:
  1.0        -0.045314   -0.0424834  -0.0414856  -0.0410728
 -0.045314    1.0        -0.0507187  -0.0489036  -0.0475113
 -0.0424834  -0.0507187   1.0        -0.0434265  -0.0489859
 -0.0414856  -0.0489036  -0.0434265   1.0        -0.0422721
 -0.0410728  -0.0475113  -0.0489859  -0.0422721   1.0

In [14]:
Γ_CS_negative

5×5 Matrix{Float64}:
  0.1   -0.06  -0.06  -0.06  -0.06
 -0.06   0.1   -0.06  -0.06  -0.06
 -0.06  -0.06   0.1   -0.06  -0.06
 -0.06  -0.06  -0.06   0.1   -0.06
 -0.06  -0.06  -0.06  -0.06   0.1

## Comparing <a class="anchor" id="ex3"></a>

$\rho = 0.6, \sigma^2 = 0.1$

In [15]:
empirical_covariance_AR

5×5 Matrix{Float64}:
 5.4753     0.239691  0.155916  0.103735  0.0684627
 0.239691   5.46275   0.237582  0.124543  0.071047
 0.155916   0.237582  5.40272   0.221336  0.14614
 0.103735   0.124543  0.221336  5.40579   0.241656
 0.0684627  0.071047  0.14614   0.241656  5.46087

In [16]:
empirical_covariance_CS

5×5 Matrix{Float64}:
 5.4753    0.239691  0.253737  0.256011  0.251832
 0.239691  5.46275   0.239646  0.228651  0.229315
 0.253737  0.239646  5.40309   0.220851  0.240448
 0.256011  0.228651  0.220851  5.40391   0.243061
 0.251832  0.229315  0.240448  0.243061  5.46092

$\rho = -0.6, \sigma^2 = 0.1$

In [17]:
empirical_covariance_CS_negative

5×5 Matrix{Float64}:
  5.4753    -0.247838  -0.231306  -0.225734  -0.224622
 -0.247838   5.46342   -0.275844  -0.265809  -0.259551
 -0.231306  -0.275844   5.4141    -0.234971  -0.266396
 -0.225734  -0.265809  -0.234971   5.40746   -0.229744
 -0.224622  -0.259551  -0.266396  -0.229744   5.46245