In [1]:
using Revise
using MendelIHT
using SnpArrays
using Random
using GLM
using DelimitedFiles
using Test
using Distributions
using LinearAlgebra

┌ Info: Precompiling MendelIHT [921c7187-1484-5754-b919-5d3ed9ac03c4]
└ @ Base loading.jl:1278


# First simulate multivariate Gaussian traits

In [2]:
n = 1000  # number of samples
p = 10000 # number of SNPs
k = 10    # number of causal SNPs per trait
r = 2     # number of traits

# set random seed for reproducibility
Random.seed!(2021)

# simulate `.bed` file with no missing data
x = simulate_random_snparray(undef, n, p)
xla = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, center=true, scale=true) 

# intercept is the only nongenetic covariate
z = ones(n) 

# simulate response y, true model b, and the correct non-0 positions of b
Y, true_Σ, true_b, correct_position = simulate_random_response(xla, k, r);

In [3]:
σ11, σ12, σ22 = sqrt(true_Σ[1, 1]), sqrt(true_Σ[1, 2]), sqrt(true_Σ[2, 2])
true_correlation = σ12 / (σ11 * σ22)
empirical_correlation = cor(Y[:, 1], Y[:, 2])

@show true_correlation
@show empirical_correlation;

true_correlation = 0.8220033755384759
empirical_correlation = 0.15570403809779032


# Run IHT

In [11]:
Yt = Matrix(Y')
Zt = Matrix(z')
@time fit_iht(Yt, Transpose(xla), Zt, k=10)

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse unknown regression
Link functin = IdentityLink()
Sparsity parameter (k) = 10
Prior weight scaling = off
Doubly sparse projection = off
Debias = off

Converging when tol < 0.0001:
Iteration 1: loglikelihood = -190.25516921334543, tol = 0.7758118456211052
Iteration 2: loglikelihood = 1819.187992792398, tol = 0.1428898155012873
Iteration 3: loglikelihood = 2410.0073888161824, tol = 0.04651754346934129
Iteration 4: loglikelihood = 2567.74886336667, tol = 0.011846769005166678
Iteration 5: loglikelihood = 2601.091085582637, tol = 0.018901526422886575
Iteration 6: loglik


Compute time (sec):     1.2861590385437012
Final loglikelihood:    2635.989391876837
Iterations:             14

Trait 1: IHT estimated 10 nonzero SNP predictors
[1m10×2 DataFrame[0m
[1m Row [0m│[1m Position [0m[1m Estimated_β [0m
[1m     [0m│[90m Int64    [0m[90m Float64     [0m
─────┼───────────────────────
   1 │      782   -0.387253
   2 │      901    0.776228
   3 │     1204    0.73234
   4 │     1306   -1.45682
   5 │     1427    0.0570532
   6 │     1655   -0.186513
   7 │     3160   -0.845163
   8 │     4201    0.309019
   9 │     6047    0.152959
  10 │     6879   -1.23302

Trait 1: IHT estimated 0 non-genetic predictors
[1m0×2 DataFrame[0m

Trait 2: IHT estimated 10 nonzero SNP predictors
[1m10×2 DataFrame[0m
[1m Row [0m│[1m Position [0m[1m Estimated_β [0m
[1m     [0m│[90m Int64    [0m[90m Float64     [0m
─────┼───────────────────────
   1 │       46    1.73289
   2 │     4797   -1.20165
   3 │     5616   -0.962282
   4 │     6072    0.536906
  

## Check answer

In [5]:
# first beta
β1 = result.beta[1, :]
true_b1_idx = findall(!iszero, true_b[:, 1])
[β1[true_b1_idx] true_b[true_b1_idx, 1]]

10×2 Array{Float64,2}:
 -0.388623  -0.402269
  0.778375   0.758756
  0.735256   0.729135
 -1.45943   -1.47163
 -0.182059  -0.172668
 -0.84684   -0.847906
  0.31211    0.296183
  0.0       -0.0034339
  0.150628   0.125965
 -1.22745   -1.24972

In [6]:
# second beta
β2 = result.beta[2, :]
true_b2_idx = findall(!iszero, true_b[:, 2])
[β2[true_b2_idx] true_b[true_b2_idx, 2]]

10×2 Array{Float64,2}:
  1.73639    1.73729
 -1.19923   -1.19911
  0.0        0.0121193
 -0.97377   -0.969569
  0.537718   0.540525
 -0.592252  -0.609556
  0.506521   0.481189
  0.0       -0.0524866
  0.337317   0.31182
  1.27771    1.29813

In [7]:
# covariance matrix
[vec(result.Σ) vec(true_Σ)]

4×2 Array{Float64,2}:
 1.17665  1.22512
 1.19901  1.23674
 1.19901  1.23674
 1.46801  1.494

# Test Cross validation

In [None]:
fit_iht(Yt, Transpose(xla), Zt, k=1) # kernel dies