In [1]:
using Revise
using MendelIHT
using SnpArrays
using Random
using GLM
using DelimitedFiles
using Test
using Distributions
using LinearAlgebra

┌ Info: Precompiling MendelIHT [921c7187-1484-5754-b919-5d3ed9ac03c4]
└ @ Base loading.jl:1278


# First simulate multivariate Gaussian traits

In [5]:
n = 1000  # number of samples
p = 10000 # number of SNPs
k = 10    # number of causal SNPs per trait
r = 2     # number of traits

# set random seed for reproducibility
Random.seed!(2021)

# simulate `.bed` file with no missing data
x = simulate_random_snparray(undef, n, p)
xla = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, center=true, scale=true) 

# intercept is the only nongenetic covariate
z = ones(n) 

# simulate response y, true model b, and the correct non-0 positions of b
Y, Σ, true_b, correct_position = simulate_random_response(xla, k, r);

In [6]:
σ11, σ12, σ22 = sqrt(Σ[1, 1]), sqrt(Σ[1, 2]), sqrt(Σ[2, 2])
true_correlation = σ12 / (σ11 * σ22)
empirical_correlation = cor(Y[:, 1], Y[:, 2])

@show true_correlation
@show empirical_correlation;

true_correlation = 0.8220033755384759
empirical_correlation = 0.15570403809779032


# Run IHT

In [58]:
result = fit_iht(Matrix(Y'), Transpose(xla), Matrix(z'), k=10, max_step=10)
@show result
@show correct_position

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse unknown regression
Link functin = IdentityLink()
Sparsity parameter (k) = 10
Prior weight scaling = off
Doubly sparse projection = off
Debias = off

Converging when tol < 0.0001:
η_step = 0
Iteration 1: loglikelihood = -190.2551692133452, tol = 0.5
η_step = 0
Iteration 2: loglikelihood = 38.057473388844755, tol = 0.1046898215611281
η_step = 0
Iteration 3: loglikelihood = 181.70817012221937, tol = 0.09073610430067332
η_step = 0
Iteration 4: loglikelihood = 325.20792963537474, tol = 0.07565585224884278
η_step = 0
Iteration 5: loglikelihood = 461.35914564801806, tol 

20-element Array{CartesianIndex{2},1}:
 CartesianIndex(782, 1)
 CartesianIndex(901, 1)
 CartesianIndex(1204, 1)
 CartesianIndex(1306, 1)
 CartesianIndex(1655, 1)
 CartesianIndex(3160, 1)
 CartesianIndex(4201, 1)
 CartesianIndex(4768, 1)
 CartesianIndex(6047, 1)
 CartesianIndex(6879, 1)
 CartesianIndex(46, 2)
 CartesianIndex(4797, 2)
 CartesianIndex(5532, 2)
 CartesianIndex(5616, 2)
 CartesianIndex(6072, 2)
 CartesianIndex(6573, 2)
 CartesianIndex(7236, 2)
 CartesianIndex(7407, 2)
 CartesianIndex(7667, 2)
 CartesianIndex(9763, 2)

In [78]:
# first beta
β1 = result.beta[1, :]
true_b1_idx = findall(!iszero, true_b[:, 1])
[β1[true_b1_idx] true_b[true_b1_idx, 1]]

10×2 Array{Float64,2}:
 -0.396779  -0.402269
  0.780772   0.758756
  0.723603   0.729135
 -1.34918   -1.47163
  0.0       -0.172668
 -0.838393  -0.847906
  0.314966   0.296183
  0.0       -0.0034339
  0.0        0.125965
 -1.22879   -1.24972

In [80]:
# second beta
β2 = result.beta[2, :]
true_b2_idx = findall(!iszero, true_b[:, 2])
[β2[true_b2_idx] true_b[true_b2_idx, 2]]

10×2 Array{Float64,2}:
  1.72293    1.73729
 -1.1949    -1.19911
  0.0        0.0121193
 -0.959623  -0.969569
  0.518765   0.540525
 -0.586501  -0.609556
  0.497433   0.481189
  0.0       -0.0524866
  0.0        0.31182
  1.27256    1.29813

In [84]:
# covariance matrix
@show Σ
@show result.Σ;

Σ = [1.2251234040264405 1.2367352467240182; 1.2367352467240182 1.493996912280251]
result.Σ = [1.313315377127968 1.3101794957089767; 1.3101794957089767 1.6229842334000664]
