# Simulations using NFBC1966 results

Using chromosome 21 of the NFBC (Stampeed) data ($n=5340$ samples and $p = 5540$ SNPs), let us simulate:
+ Non-independent samples
+ Polygenic traits where every SNP contributes to the variance of the phenotypes, but only $k$ SNPs contribute to the mean of the phenotype.

## Simulation model

For $r$ traits, our simulation model is:

$$\mathbf{Y}_{r \times n} \sim \text{MatrixNormal}(\mathbf{B}_{r \times p}\mathbf{X}_{p \times n}, \ \ \Sigma_{r \times r} , \ \ \sigma_g^2\Phi_{n \times n} + \sigma_e^2 \mathbf{I}_{n \times n})$$

+ $\mathbf{X}_{p \times n}$ contains *all* predictors (genetic + non-genetic)
+ $\mathbf{B}_{r \times p}$ contains (true) regression coefficients. $k$ entries are non-zero
+ $\Phi$ is the GRM estimated from genotypes
+ $\Sigma_{r \times r} = \mathbf{A}^t\mathbf{A}$ is the row (trait) covariance matrix where $A_{ij} \sim N(0, 1)$
+ $\sigma_g^2\Phi_{n \times n} + \sigma_e^2 \mathbf{I}_{n \times n}$ is the column (sample) covariance matrix
+ $\sigma_g^2 = 0.6$ and $\sigma_e^2 = 0.4$ (thus heritability is 60%)
+ $k = \{10, 20, 50, 100\}$ (number of causal SNPs)
+ Pleiotripic overlap = 20% (e.g. if $k=10$, then 2 causal SNPs are shared in each trait, thus only 8 unique causal SNPs)
+ $\beta_i \sim N(0, 0.1)$
+ $r = \{2, 5\}$  (number of traits)
+ $n=5340$ samples
+ $p = 5540$ SNPs

In [1]:
# using Revise
# using MendelIHT
# using SnpArrays
# using Random
# using GLM
# using DelimitedFiles
# using Distributions
# using LinearAlgebra
# using CSV
# using DataFrames
# using StatsBase
# using Statistics
# using TraitSimulation

using Distributed
addprocs(8)
@everywhere begin
    using Revise
    using MendelIHT
    using SnpArrays
    using Random
    using GLM
    using DelimitedFiles
    using Distributions
    using LinearAlgebra
    using CSV
    using DataFrames
    using StatsBase
    using Statistics
    using TraitSimulation
end

┌ Info: Precompiling MendelIHT [921c7187-1484-5754-b919-5d3ed9ac03c4]
└ @ Base loading.jl:1317


### Helper functions for parsing results

In [5]:
"""
Find the `nsnps` most significant SNPs for mvPLINK in simulation `sim`.
"""
function get_top_mvPLINK_SNPs(sim::Int, nsnps::Int)
    n, p = 5402, 26906
    dir = "NFBCsim/sim$sim/"
    
    # read mvPLINK result
    mvplink_df = CSV.read(dir * "plink.mqfam.total", DataFrame, delim=' ', ignorerepeated=true)

    # get pvalues, possibly accounting for "NA"s
    if eltype(mvplink_df[!, :P]) == Float64
        pval = mvplink_df[!, :P]
    else
        mvplink_df[findall(x -> x == "NA", mvplink_df[!, :P]), :P] .= "1.0"
        pval = parse.(Float64, mvplink_df[!, :P])
    end
    perm = sortperm(pval)
    return perm[1:nsnps]
end

"""
Find the position of the `nsnps` most significant SNPs for GEMMA in simulation `sim`.
"""
function get_top_GEMMA_SNP_ids(sim::Int, nsnps::Int)
    dir = "NFBCsim/sim$sim/"
    gemma_df = CSV.read(dir * "gemma.sim$sim.assoc.txt", DataFrame)
    pval_wald = gemma_df[!, :p_wald]
    perm = sortperm(pval_wald)
    return perm[1:nsnps]
end

"""
Find significant SNPs return by IHT in simulation `sim`.
"""
function get_IHT_SNPs(sim::Int)
    dir = "NFBCsim/sim$sim/"
    iht_β1 = vec(readdlm(dir * "iht_beta1.txt"))
    iht_β2 = vec(readdlm(dir * "iht_beta2.txt"))
    detected_snps = findall(!iszero, iht_β1) ∪ findall(!iszero, iht_β2)
    return unique(detected_snps)
end

"""
Get positions for the truly causal SNPs in simulation `sim`. 
"""
function get_true_SNPs(sim::Int)
    model = "NFBCsim"
    dir = "NFBCsim/sim$sim/"
    trueB = readdlm(dir * "trueb.txt")
    causal_snps = unique([x[1] for x in findall(!iszero, trueB)])
    return causal_snps
end

"""
Get causal SNPs' position in GEMMA's result for simulation `sim`. Note gemma have snp filtering.
"""
function get_gemma_causal_snp_pos(sim::Int)
    model = "NFBCsim"
    nfbc = SnpData("/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/imputed_with_0/NFBC_imputed_with_0")
    dir = "NFBCsim/sim$sim/"
    trueB = readdlm(dir * "trueb.txt")
    causal_snps = unique([x[1] for x in findall(!iszero, trueB)])
    causal_snp_rsID = nfbc.snp_info.snpid[causal_snps]
    gemma_df = CSV.read("NFBCsim/sim$sim/gemma.sim$sim.assoc.txt", DataFrame)
    gemma_snps = gemma_df[!, :rs]
    causal_snp_idx = convert(Vector{Int}, indexin(causal_snp_rsID, gemma_snps))
    
    # also need IHT's selected SNPs
    iht_snps_rsID = nfbc.snp_info.snpid[get_IHT_SNPs(sim)]
    iht_snps_idx = convert(Vector{Int}, indexin(iht_snps_rsID, gemma_snps))
    
    # also need SNP positions in GEMMA dataframe
    gemma2nfbc_idx = convert(Vector{Int}, indexin(gemma_snps, nfbc.snp_info.snpid))
    gemma_snp_pos = Vector{Int}(undef, size(gemma_df, 1))
    for i in 1:size(gemma_df, 1)
        gemma_snp_pos[i] = nfbc.snp_info.position[gemma2nfbc_idx[i]]
    end
    insertcols!(gemma_df, size(gemma_df, 2) + 1, :pos => gemma_snp_pos)
    
    return gemma_df, causal_snp_idx, iht_snps_idx
end

"""
- filename: gemma's output file name
- correct_snps: indices for real causal SNPs

returns: power, number of false positives, and false positive rate
"""
function process_gemma_result(filename, correct_snps)
    # read GEMMA result
    gemma_df = CSV.read(filename, DataFrame)
    snps = size(gemma_df, 1)

    # pvalues
    pval_wald = gemma_df[!, :p_wald]
#    pval_lrt = gemma_df[!, :p_lrt]
#     pval_score = gemma_df[!, :p_score]

    # estimated beta
    estim_β1 = gemma_df[!, :beta_1]
    estim_β2 = gemma_df[!, :beta_2]

    # estimated covariance matrix
    estim_σ11 = gemma_df[!, :Vbeta_1_1]
    estim_σ12 = gemma_df[!, :Vbeta_1_2]
    estim_σ22 = gemma_df[!, :Vbeta_2_2];

    # check how many real SNPs were recovered
    signif_snps = findall(x -> x ≤ 0.05 / snps, pval_wald) # gemma's selected snps
    signif_snps_rsID = gemma_df[signif_snps, :rs]
    
    # compute power, false positives, and false positive rate
    power_and_fpr(snps, correct_snps, signif_snps_rsID)
end

"""
- filename: mvPLINK's output file name
- correct_snps: indices for real causal SNPs

returns: power, number of false positives, and false positive rate
"""
function process_mvPLINK(filename, correct_snps)
    # read mvPLINK result
    mvplink_df = CSV.read(filename, DataFrame, delim=' ', ignorerepeated=true)
    snps = size(mvplink_df, 1)

    # get pvalues, possibly accounting for "NA"s
    if eltype(mvplink_df[!, :P]) == Float64
        pval = mvplink_df[!, :P]
    else
        mvplink_df[findall(x -> x == "NA", mvplink_df[!, :P]), :P] .= "1.0"
        pval = parse.(Float64, mvplink_df[!, :P])
    end

    # SNPs passing threshold
    signif_snps = findall(x -> x ≤ 0.05 / snps, pval)

    # compute power, false positives, and false positive rate
    power_and_fpr(snps, correct_snps, signif_snps)
end

"""
Computes power and false positive rates
- p: total number of SNPs
- correct_snps: Indices (or rsID) of the true causal SNPs
- detected_snps: Indices (or rsID) of SNPs that are significant after testing
"""
function power_and_fpr(p::Int, correct_snps::Vector, detected_snps::Vector)
    power = length(detected_snps ∩ correct_snps) / length(correct_snps)
    FP = length(detected_snps) - length(detected_snps ∩ correct_snps) # number of false positives
    TN = p - length(detected_snps) # number of true negatives
    FPR = FP / (FP + TN)
    return power, FP, FPR
end

"""
Summarize all simulations for IHT, mvPLINK, GEMMA in computation time, true positives,
false positives, and false positive rates. 
"""
function summarize_repeats()
    model = "NFBCsim"
    n, p = 5402, 26906
    sims = 1:50 # k = 10, r = 2, βoverlap=2, polygenic model
    nfbc = SnpData("/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/imputed_with_0/NFBC_imputed_with_0")
    snp_rsID = nfbc.snp_info.snpid

    iht_time, iht_power, iht_FP, iht_FPR = Float64[], Float64[], Float64[], Float64[]
    mvPLINK_time, mvPLINK_power, mvPLINK_FP, mvPLINK_FPR = Float64[], Float64[], Float64[], Float64[]
    gemma_time, gemma_power, gemma_FP, gemma_FPR = Float64[], Float64[], Float64[], Float64[]

    for sim in sims
        dir = "NFBCsim/sim$sim/"
        try
            # correct SNPs
            trueB = readdlm(dir * "trueb.txt")
            causal_snps = unique([x[1] for x in findall(!iszero, trueB)])
            causal_snps_rsID = snp_rsID[causal_snps]

            # IHT
            iht_β1 = vec(readdlm(dir * "iht_beta1.txt"))
            iht_β2 = vec(readdlm(dir * "iht_beta2.txt"))
            detected_snps = findall(!iszero, iht_β1) ∪ findall(!iszero, iht_β2)
            ihtpower, ihtFP, ihtFPR = power_and_fpr(p, causal_snps, detected_snps)

            # MVPLINK
            plinkpower, plinkFP, plinkFPR = process_mvPLINK(dir * "plink.mqfam.total", causal_snps)

            # GEMMA 
            gemmapower, gemmaFP, gemmaFPR = process_gemma_result(dir * "gemma.sim$sim.assoc.txt", causal_snps_rsID)
            
            push!(iht_power, ihtpower); push!(iht_FP, ihtFP); push!(iht_FPR, ihtFPR); 
            push!(mvPLINK_power, plinkpower); push!(mvPLINK_FP, plinkFP); push!(mvPLINK_FPR, plinkFPR); 
            push!(gemma_power, gemmapower); push!(gemma_FP, gemmaFP); push!(gemma_FPR, gemmaFPR);
        catch
            println("simulation $sim failed!")
        end
    end

    return iht_time, iht_power, iht_FP, iht_FPR,
        mvPLINK_time, mvPLINK_power, mvPLINK_FP, mvPLINK_FPR,
        gemma_time, gemma_power, gemma_FP, gemma_FPR
end

"""
Imports gemma p-values, causal SNPs, and IHT selected SNP, and plot manhattan plot using MendelPlots.jl
"""
function plot_gemma_manhattan(sim::Int)
    # GEMMA causal SNPs
    gemma_df, causal_snps, iht_snps = get_gemma_causal_snp_pos(sim)
    rename!(gemma_df, [:p_wald => :pval, :rs => :snpid])
    gemma_df[findall(x -> x < 1e-50, gemma_df[!, :pval]), :pval] .= 1e-50
    empty_col = ["" for i in 1:size(gemma_df, 1)]
    insertcols!(gemma_df, size(gemma_df, 2) + 1, :empty_col => empty_col)

    manhattan(gemma_df, outfile = "NFBCsim/manhattan_gemma_sim$sim.png",
        annotateinds = causal_snps, annotateinds2 = iht_snps,
        annotatevar=:empty_col, titles="GEMMA simulation $sim")
    display("image/png", read("NFBCsim/manhattan_gemma_sim$sim.png"))
end

"""
Imports mvPLINK p-values, causal SNPs, and IHT selected SNP, and plot manhattan plot using MendelPlots.jl
"""
function plot_mvPLINK_manhattan(sim::Int)
    # mvPLINK
    filename = "NFBCsim/sim$sim/plink.mqfam.total"
    mvplink_df = CSV.read(filename, DataFrame, delim=' ', ignorerepeated=true)
    if eltype(mvplink_df[!, :P]) == Float64
        pval = mvplink_df[!, :P]
    else
        mvplink_df[findall(x -> x == "NA", mvplink_df[!, :P]), :P] .= "1.0"
        pval = parse.(Float64, mvplink_df[!, :P])
    end
    pval[findall(x -> x < 1e-50, pval)] .= 1e-50 # limit smallest pvalues

    # causal SNPs
    causal_snps = get_true_SNPs(sim)
    
    # IHT SNPs
    iht_snps = get_IHT_SNPs(sim)

    # make dataframe to input into MendelPlots
    snpdata = SnpData("/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/imputed_with_0/NFBC_imputed_with_0")
    rename!(snpdata.snp_info, [:chr, :snpid, :genetic_distance, :pos, :allele1, :allele2])
    insertcols!(snpdata.snp_info, size(snpdata.snp_info, 2) + 1, :pval => pval)
    empty_col = ["" for i in 1:size(snpdata.snp_info, 1)]
    insertcols!(snpdata.snp_info, size(snpdata.snp_info, 2) + 1, :empty_col => empty_col)

    # plot
    manhattan(snpdata.snp_info, outfile = "NFBCsim/manhattan_mvPLINK_sim$sim.png",
        annotateinds = causal_snps, annotateinds2 = iht_snps, 
        annotatevar=:empty_col, titles="mvPLINK simulation $sim")
    display("image/png", read("NFBCsim/manhattan_mvPLINK_sim$sim.png"))
end

"""
Imports mvPLINK p-values and plot QQ plot using MendelPlots.jl
"""
function plot_mvPLINK_QQ(sim::Int)
    filename = "NFBCsim/sim$sim/plink.mqfam.total"
    mvplink_df = CSV.read(filename, DataFrame, delim=' ', ignorerepeated=true)
    if eltype(mvplink_df[!, :P]) == Float64
        pval = mvplink_df[!, :P]
    else
        mvplink_df[findall(x -> x == "NA", mvplink_df[!, :P]), :P] .= "1.0"
        pval = parse.(Float64, mvplink_df[!, :P])
    end
    pval[findall(x -> x < 1e-50, pval)] .= 1e-50 # limit smallest pvalues
    qq(pval, outfile = "NFBCsim/QQ_mvPLINK_sim$sim.png",
        ylabel="mvPLINK observed -log10(p)", titles="mvPLINK simulation $sim")
    display("image/png", read("NFBCsim/QQ_mvPLINK_sim$sim.png"))
end

"""
Imports gemma p-values and plot QQ plot using MendelPlots.jl
"""
function plot_gemma_QQ(sim::Int)
    filename = "NFBCsim/sim$sim/gemma.sim$sim.assoc.txt"
    gemma_df = CSV.read(filename, DataFrame)
    pval_wald = gemma_df[!, :p_wald]
    pval_wald[findall(x -> x < 1e-50, pval_wald)] .= 1e-50 # limit smallest pvalues
    qq(pval_wald, outfile = "NFBCsim/QQ_gemma_sim$sim.png",
        ylabel="GEMMA observed -log10(p)", titles="GEMMA simulation $sim")
    display("image/png", read("NFBCsim/QQ_gemma_sim$sim.png"))
end

"""
After performing n simulations using `run_repeats`, this function reads the summary files
for each simulation and summarizes the result. 
"""
function read_summary(simulations::UnitRange)
    model = "NFBCsim"
    
    iht_time, iht_power, iht_FP, iht_FPT = Float64[], Float64[], Float64[], Float64[]
    mvPLINK_time, mvPLINK_power, mvPLINK_FP, mvPLINK_FPT = Float64[], Float64[], Float64[], Float64[]
    gemma_time, gemma_power, gemma_FP, gemma_FPT = Float64[], Float64[], Float64[], Float64[]

    regex = r"= (\d+\.\d+) seconds, power = (\d+\.\d+), FP = (\d+), FPR = (\d\.\d+e?-?\d*)"
    for sim in simulations
        if !isdir("$model/sim$sim") || !isfile("$model/sim$(sim)/summary.txt")
            println("Simulation $sim failed!")
            continue
        end
        try
            open("$model/sim$(sim)/summary.txt", "r") do io
                readline(io); readline(io); readline(io)

                # parse IHT result
                iht = match(regex, readline(io))
                push!(iht_time, parse(Float64, iht[1]))
                push!(iht_power, parse(Float64, iht[2]))
                push!(iht_FP, parse(Float64, iht[3]))
                push!(iht_FPT, parse(Float64, iht[4]))

                # parse mvPLINK result
                mvPLINK = match(regex, readline(io))
                push!(mvPLINK_time, parse(Float64, mvPLINK[1]))
                push!(mvPLINK_power, parse(Float64, mvPLINK[2]))
                push!(mvPLINK_FP, parse(Float64, mvPLINK[3]))
                push!(mvPLINK_FPT, parse(Float64, mvPLINK[4]))

                # parse mvPLINK result
                gemma = match(regex, readline(io))
                push!(gemma_time, parse(Float64, gemma[1]))
                push!(gemma_power, parse(Float64, gemma[2]))
                push!(gemma_FP, parse(Float64, gemma[3]))
                push!(gemma_FPT, parse(Float64, gemma[4]))
            end
        catch
            println("Simulation $sim failed!")
            continue
        end
    end

    # save summary statistics
    open("$(model)_summary.txt", "w") do io
        println(io, "iht_time,iht_power,iht_FP,iht_FPT,mvPLINK_time,mvPLINK_power," * 
            "mvPLINK_FP,mvPLINK_FPT,gemma_time,gemma_power,gemma_FP,gemma_FPT")
        for i in eachindex(iht_time)
            print(io, iht_time[i], ',', iht_power[i], ',', iht_FP[i], ',', iht_FPT[i], ',')
            print(io, mvPLINK_time[i], ',', mvPLINK_power[i], ',', mvPLINK_FP[i], ',', mvPLINK_FPT[i], ',')
            print(io, gemma_time[i], ',', gemma_power[i], ',', gemma_FP[i], ',', gemma_FPT[i], "\n")
        end
    end
    
    return iht_time, iht_power, iht_FP, iht_FPT, 
        mvPLINK_time, mvPLINK_power, mvPLINK_FP, mvPLINK_FPT,
        gemma_time, gemma_power, gemma_FP, gemma_FPT
end

"""
k = Number of causal SNPs
p = Total number of SNPs
traits = Number of traits (phenotypes)
overlap = number of causal SNPs shared in each trait
"""
function simulate_random_beta(k::Int, p::Int, traits::Int; overlap::Int=0, βσ=1.0)
    d = Normal(0, βσ)
    true_b = zeros(p, traits)
    if overlap == 0
        causal_snps = sample(1:(traits * p), k, replace=false)
        true_b[causal_snps] = rand(d, k)
    else
        shared_snps = sample(1:p, overlap, replace=false)
        weight_vector = aweights(1 / (traits * (p - overlap)) * ones(traits * p))
        for i in 1:traits
            weight_vector[i*shared_snps] .= 0.0 # avoid sampling from shared snps
        end
        @assert sum(weight_vector) ≈ 1.0
        # simulate β for shared predictors
        for i in 1:traits
            true_b[shared_snps, i] = rand(d, overlap)
        end
        # simulate β for none shared predictors
        nonshared_snps = sample(1:(traits * p), weight_vector, k - (traits * overlap), replace=false)
        true_b[nonshared_snps] = rand(d, k - (traits * overlap))
    end

    return true_b
end

"""
ad-hoc matrix-matrix multiplication (out = st * v) using SnpLinAlg
"""
function adhoc_mul!(
    out::AbstractMatrix{T}, 
    st::SnpLinAlg{T},
    v::AbstractMatrix{T}) where T <: AbstractFloat
    @assert size(out, 1) == size(st, 1) && size(v, 2) == size(v, 2) && size(st, 2) == size(v, 1)
    for i in 1:size(v, 2)
        outi = @view(out[:, i])
        vi = @view(v[:, i])
        SnpArrays.mul!(outi, st, vi)
    end
end

function simulate_NFBC1966_polygenic(
    plinkname::String, k::Int, r::Int;
    seed::Int=2021, σg=0.6, σe=0.4, βoverlap=2, βσ=1.0,
    )
    # set seed
    Random.seed!(seed)
    
    # simulate `.bed` file with no missing data
    x = SnpArray(plinkname * ".bed")
#     xla = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, impute=true, center=true, scale=true)
    xla = convert(Matrix{Float64}, x, impute=true, center=true, scale=true)
    n, p = size(x)
    
    # intercept is the only nongenetic covariate
    Z = ones(n, 1)
    intercepts = randn(r)' # each trait have different intercept

    # simulate β
    B = simulate_random_beta(k, p, r, overlap=βoverlap, βσ=βσ)

    # between trait covariance matrix
    Σ = random_covariance_matrix(r)

    # between sample covariance is identity + GRM
    Φ = grm(x, method=:Robust)
    V = σg * Φ + σe * I

    # simulate y using TraitSimulations.jl (https://github.com/OpenMendel/TraitSimulation.jl/blob/master/src/modelframework.jl#L137)
    vc = @vc Σ ⊗ V
    μ = zeros(n, r)
    μ_null = zeros(n, r)
    LinearAlgebra.mul!(μ_null, Z, intercepts)
#     adhoc_mul!(μ, xla, B)
    mul!(μ, xla, B)
    BLAS.axpby!(1.0, μ_null, 1.0, μ)
    VCM_model = VCMTrait(Z, intercepts, xla, B, vc, μ)
    Y = Matrix(Transpose(simulate(VCM_model)))
    
    return xla, Matrix(Z'), B, Σ, Y
end

function simulate_NFBC1966_sparse(
    plinkname::String, k::Int, r::Int;
    seed::Int=2021, βoverlap=2, βσ = 1.0
    )
    # set seed
    Random.seed!(seed)
    
    # simulate `.bed` file with no missing data
    x = SnpArray(plinkname * ".bed")
#     xla = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, center=true, scale=true)
    xla = convert(Matrix{Float64}, x, impute=true, center=true, scale=true)
    n, p = size(xla)
    
    # intercept is the only nongenetic covariate
    Z = ones(n, 1)
    intercepts = zeros(r)' # each trait have different intercept

    # simulate β
    B = simulate_random_beta(k, p, r, overlap=βoverlap, βσ=βσ)

    # between trait covariance matrix
    Σ = random_covariance_matrix(r)
    
    # simulate multivariate normal phenotype for each sample
    μ = xla * B + Z*intercepts

    # simulate response
    Y = zeros(n, r)
    for i in 1:n
        μi = @view(μ[i, :])
        Y[i, :] = rand(MvNormal(μi, Σ))
    end
    
    return xla, Matrix(Z'), B, Σ, Matrix(Y')
end

function simulate_multivariate_sparse(
    n::Int, p::Int, k::Int, r::Int;
    seed::Int=2021, βoverlap=2, 
    )
    # set seed
    Random.seed!(seed)
    
    # simulate `.bed` file with no missing data
    x = simulate_random_snparray(undef, n, p)
    xla = convert(Matrix{Float64}, x, impute=true, center=true, scale=true)
#     xla = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, center=true, scale=true)
    n, p = size(x)

    # intercept is the only nongenetic covariate
    z = ones(n, 1)
    intercepts = zeros(r)' # each trait have different intercept

    # simulate response y, true model b, and the correct non-0 positions of b
    Y, true_Σ, true_b, correct_position = simulate_random_response(xla, k, r, Zu=z*intercepts, overlap=βoverlap);
    
    return xla, Matrix(z'), true_b, true_Σ, Matrix(Y')
end

simulate_multivariate_sparse (generic function with 1 method)

# Try running IHT

In [26]:
# simulation parameters
k = 10
r = 2
βσ = 0.1
seed = 1111
βoverlap = 2

# use chr21 of NFBC
chr21 = "/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/qc/NFBC.qc.chr.21"
xla, Z, true_b, Σ, Y = simulate_NFBC1966_sparse(chr21,k,r,seed=seed,βoverlap=βoverlap,βσ=βσ)
causal_snps = unique([x[1] for x in findall(!iszero, true_b)]);

# simulate random data
# n = 1000
# p = 10000
# xla, Z, true_b, Σ, Y = simulate_multivariate_sparse(n, p, k,r,seed=seed,βoverlap=βoverlap)
# causal_snps = unique([x[1] for x in findall(!iszero, true_b)]);

In [27]:
println(Σ)
println(causal_snps)
true_b[causal_snps, :]

[0.056419962055242315 0.1802121298258207; 0.1802121298258207 2.723094788677102]
[2349, 3073, 3075, 4653, 5091, 5344, 5485, 2651]


8×2 Matrix{Float64}:
 -0.0523363   0.146389
 -0.105674    0.0
 -0.03193     0.0
  0.0654857   0.0
  0.0360225   0.0
 -0.0429316   0.0
  0.108861    0.0877691
  0.0        -0.141978

In [28]:
# no intercept term
function loglikelihood(X, Y, B, Γ)
    n = size(Y, 2)
    resid = Y - B*X
    return n/2 * logdet(Γ) - 0.5 * tr(Γ*resid*resid')
end

# this should be the best loglikehood attainable 
@show loglikelihood(Transpose(xla), Y, Transpose(true_b), inv(Σ))

# this should be loglikehood at iter 1 
@show loglikelihood(Transpose(xla), Y, zeros(size(Transpose(true_b))), [1.0 0.0; 0.0 1.0])

loglikelihood(Transpose(xla), Y, Transpose(true_b), inv(Σ)) = 373.8599630859753
loglikelihood(Transpose(xla), Y, zeros(size(Transpose(true_b))), [1.0 0.0; 0.0 1.0]) = -7480.311995286563


-7480.311995286563

In [37]:
result = fit_iht(Y, Transpose(xla), k=12, init_beta=true, verbose=true, debias=true)

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
Prior weight scaling = off
Doubly sparse projection = off
Debias = on
Max IHT iterations = 200
Converging when tol < 0.0001:

Iteration 1: loglikelihood = -348.5823880579728, backtracks = 0, tol = 0.24348840335195368
Iteration 2: loglikelihood = -92.63716046636273, backtracks = 0, tol = 0.018033536901510928
Iteration 3: loglikelihood = -80.25529163986357, backtracks = 0, tol = 0.0
Iteration 4: loglikelihood = -78.93358707829248, backtracks = 0, tol = 0.0
Iteration 5: loglike


Compute time (sec):     0.6254458427429199
Final loglikelihood:    253.59208790828507
Iterations:             8
Trait 1's SNP PVE:      0.33214236400341335
Trait 2's SNP PVE:      0.015004585236065574

Estimated trait covariance:
[1m2×2 DataFrame[0m
[1m Row [0m│[1m trait1    [0m[1m trait2   [0m
[1m     [0m│[90m Float64   [0m[90m Float64  [0m
─────┼─────────────────────
   1 │ 0.0580659  0.179454
   2 │ 0.179454   2.67415

Trait 1: IHT estimated 5 nonzero SNP predictors
[1m5×2 DataFrame[0m
[1m Row [0m│[1m Position [0m[1m Estimated_β [0m
[1m     [0m│[90m Int64    [0m[90m Float64     [0m
─────┼───────────────────────
   1 │     2349   -0.0502797
   2 │     3073   -0.0960889
   3 │     4653    0.0661157
   4 │     5091    0.0441988
   5 │     5485    0.107593

Trait 1: IHT estimated 0 non-genetic predictors
[1m0×2 DataFrame[0m

Trait 2: IHT estimated 7 nonzero SNP predictors
[1m7×2 DataFrame[0m
[1m Row [0m│[1m Position [0m[1m Estimated_β [0m
[1m     

In [43]:
result = fit_iht(Y, Transpose(xla), k=12, verbose=true, debias=true)

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
Prior weight scaling = off
Doubly sparse projection = off
Debias = on
Max IHT iterations = 200
Converging when tol < 0.0001:

Iteration 1: loglikelihood = -1086.7080446381206, backtracks = 0, tol = 0.0


[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m


Iteration 2: loglikelihood = -1083.8182233107045, backtracks = 0, tol = 7.068275112573435e-5
Iteration 3: loglikelihood = -176.068152614067, backtracks = 0, tol = 0.035389719786740916
Iteration 4: loglikelihood = -47.66422231391789, backtracks = 0, tol = 0.025845269446551987
Iteration 5: loglikelihood = 176.3059362969816, backtracks = 1, tol = 0.017070492275814027
Iteration 6: loglikelihood = 239.97371844612735, backtracks = 1, tol = 0.08418738084591948
Iteration 7: loglikelihood = 355.59977418903236, backtracks = 0, tol = 0.027136988474549074
Iteration 8: loglikelihood = 324.99253598381165, backtracks = 3, tol = 0.017526955061516632
Iteration 9: loglikelihood = 336.9928024581186, backtracks = 0, tol = 0.0



Compute time (sec):     0.6988649368286133
Final loglikelihood:    355.59977418903236
Iterations:             9
Trait 1's SNP PVE:      0.39179817428213126
Trait 2's SNP PVE:      0.008613478375068375

Estimated trait covariance:
[1m2×2 DataFrame[0m
[1m Row [0m│[1m trait1    [0m[1m trait2   [0m
[1m     [0m│[90m Float64   [0m[90m Float64  [0m
─────┼─────────────────────
   1 │ 0.0562832  0.179476
   2 │ 0.179476   2.69174

Trait 1: IHT estimated 7 nonzero SNP predictors
[1m7×2 DataFrame[0m
[1m Row [0m│[1m Position [0m[1m Estimated_β [0m
[1m     [0m│[90m Int64    [0m[90m Float64     [0m
─────┼───────────────────────
   1 │     2349   -0.0421664
   2 │     3073   -0.103315
   3 │     3077    0.0335914
   4 │     4653    0.0661619
   5 │     5091    0.04317
   6 │     5344   -0.0359141
   7 │     5485    0.105314

Trait 1: IHT estimated 0 non-genetic predictors
[1m0×2 DataFrame[0m

Trait 2: IHT estimated 5 nonzero SNP predictors
[1m5×2 DataFrame[0m
[1m Row

In [44]:
# beta 1
[result.beta[1, causal_snps] true_b[causal_snps, 1]]

8×2 Matrix{Float64}:
 -0.0421664  -0.0523363
 -0.103315   -0.105674
  0.0        -0.03193
  0.0661619   0.0654857
  0.04317     0.0360225
 -0.0359141  -0.0429316
  0.105314    0.108861
  0.0         0.0

In [45]:
# beta 2
[result.beta[2, causal_snps] true_b[causal_snps, 2]]

8×2 Matrix{Float64}:
 0.0898681   0.146389
 0.0         0.0
 0.0         0.0
 0.0         0.0
 0.0         0.0
 0.0         0.0
 0.0265674   0.0877691
 0.0        -0.141978

In [46]:
# number of true positives
count(!iszero, sum(result.beta[:, causal_snps], dims=1))

6

In [81]:
# covariance
[vec(result.Σ) vec(Σ)]

4×2 Matrix{Float64}:
 0.0487976  0.0506762
 0.442851   0.467675
 0.442851   0.467675
 7.17377    7.60324

In [82]:
causal_snps

8-element Vector{Int64}:
  837
 2918
 6678
 7685
 9709
 9861
 6268
 8428

## Try 20 simulations (NFBC data)

We will compare

+ Number of true positives
+ Number of iteration until convergence
+ Best cross validated k

in scenario where we initialize beta values, versus not initializing. 

In [3]:
function bench(sim::Int)
    # simulation parameters
    k = 10
    r = 2
    βσ = 0.1
    seed = 10
    βoverlap = 2
    chr21 = "/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/qc/NFBC.qc.chr.21"

    no_init_TP = zeros(sim)
    no_init_iter = zeros(sim)
    no_init_best_k = zeros(sim)
    yes_init_TP = zeros(sim)
    yes_init_iter = zeros(sim)
    yes_init_best_k = zeros(sim)

    # use chr21 of NFBC
    for i in 1:sim
        xla, Z, true_b, Σ, Y = simulate_NFBC1966_sparse(chr21,k,r,seed=i,βoverlap=βoverlap,βσ=βσ)
        causal_snps = unique([x[1] for x in findall(!iszero, true_b)]);

        # not initializing beta
        mses = cv_iht(Y, Transpose(xla), path=1:20, verbose=true)
        result = fit_iht(Y, Transpose(xla), k=argmin(mses), verbose=true)
        no_init_TP[i] = count(!iszero, sum(result.beta[:, causal_snps], dims=1))
        no_init_iter[i] = result.iter
        no_init_best_k[i] = argmin(mses)

        # initializing beta
        mses = cv_iht(Y, Transpose(xla), path=1:20, init_beta=true)
        result = fit_iht(Y, Transpose(xla), k=argmin(mses), verbose=true, init_beta = true)
        yes_init_TP[i] = count(!iszero, sum(result.beta[:, causal_snps], dims=1))
        yes_init_iter[i] = result.iter
        yes_init_best_k[i] = argmin(mses)
    end

    return no_init_TP, no_init_iter, no_init_best_k, 
        yes_init_TP, yes_init_iter, yes_init_best_k
end
@time no_init_TP, no_init_iter, no_init_best_k, 
    yes_init_TP, yes_init_iter, yes_init_best_k = bench(20)

[32mCross validating...100%|████████████████████████████████| Time: 0:05:55[39m




Crossvalidation Results:
	k	MSE
	1	3345.6904133447288
	2	3266.064026264423
	3	3308.514904028456
	4	3214.707484836648
	5	3197.9836884029924
	6	3482.6959584219576
	7	3184.2409542747946
	8	3207.369832866423
	9	3182.14486949898
	10	3238.694606270842
	11	3195.5329941447835
	12	3176.4583089321204
	13	3182.3111285965006
	14	3170.329129314194
	15	3178.609247029076
	16	3179.04562682311
	17	3161.6023182510444
	18	3165.2876753910646
	19	3174.51164010528
	20	3168.4513783499056

Best k = 17

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k

[32mCross validating...100%|████████████████████████████████| Time: 0:05:26[39m




Crossvalidation Results:
	k	MSE
	1	3342.878914269393
	2	3263.737015594801
	3	6654.441546732709
	4	3210.553963108172
	5	3385.649355545925
	6	3472.985071199658
	7	3181.7746714981686
	8	3180.1537920046744
	9	3209.989773900449
	10	3180.2941991822913
	11	3210.2038331395843
	12	3163.883353699359
	13	3180.1074697076324
	14	3187.175164361818
	15	3215.1654189007727
	16	3165.931708210023
	17	3166.5662156449457
	18	3164.4594202871376
	19	3173.2040456111235
	20	3191.0968520382708

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

[32mCross validating...100%|████████████████████████████████| Time: 0:02:50[39m




Crossvalidation Results:
	k	MSE
	1	5193.743372863105
	2	4639.950683957081
	3	4625.427910244493
	4	4631.487482122058
	5	4606.8250397725715
	6	4615.526369370861
	7	4617.485243578018
	8	4616.462344590535
	9	4614.620383589085
	10	4609.74282380772
	11	4608.602947535937
	12	4600.129665045859
	13	4611.1154069760105
	14	4602.020644950068
	15	4582.758151406319
	16	4583.592755914271
	17	4595.755507322069
	18	4581.299781610968
	19	4577.835532442958
	20	4575.651657629679

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 20

[32mCross validating...100%|████████████████████████████████| Time: 0:03:36[39m




Crossvalidation Results:
	k	MSE
	1	5204.895774718422
	2	4640.614630795945
	3	4626.039613817687
	4	4611.830706719906
	5	4607.636603349818
	6	4613.80563519869
	7	4619.465608114781
	8	4620.834709459117
	9	4619.129166897842
	10	4628.678507876545
	11	4632.99454931679
	12	4626.863950518002
	13	4617.350199776749
	14	4627.696306300608
	15	4627.527707506204
	16	4623.61607048394
	17	4627.80909141189
	18	4627.2849973926
	19	4619.3379782987995
	20	4624.064407916953

Best k = 5

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
Prior 

[32mCross validating...100%|████████████████████████████████| Time: 0:05:52[39m




Crossvalidation Results:
	k	MSE
	1	6939.251191004809
	2	5375.73961007013
	3	5635.576872871168
	4	27795.091054797613
	5	30651.105286318234
	6	5565.3834185386795
	7	5443.703462092195
	8	5378.253989349664
	9	5677.740329470676
	10	5413.171470908005
	11	5418.665026306932
	12	5356.965044933153
	13	5370.469864643328
	14	5374.484276914408
	15	5417.138955913369
	16	5431.484319850393
	17	5354.079451837841
	18	5402.939043369703
	19	5381.534868478628
	20	5425.168128496677

Best k = 17

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 1

[32mCross validating...100%|████████████████████████████████| Time: 0:06:04[39m




Crossvalidation Results:
	k	MSE
	1	6943.628851960557
	2	5383.353197651573
	3	6194.970691904824
	4	6074.578075519688
	5	5510.510992442437
	6	5582.480528456512
	7	5456.117062914986
	8	5345.515180697283
	9	6292.297653737644
	10	5606.206443978479
	11	5386.127468030762
	12	5696.6951668088095
	13	5736.751444227204
	14	5805.372532702404
	15	5404.729075666029
	16	5465.638310847473
	17	5474.073774933104
	18	5494.912708970302
	19	5487.9769391791415
	20	5427.629883722518

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12

[32mCross validating...100%|████████████████████████████████| Time: 0:04:51[39m




Crossvalidation Results:
	k	MSE
	1	9934.309948713728
	2	6765.780999867849
	3	74412.37706033328
	4	12567.916782028578
	5	11747.152519917016
	6	6686.023881358905
	7	7591.618264997205
	8	7413.417193132451
	9	6805.898144872852
	10	6655.383301964394
	11	7033.608394204472
	12	6654.7168994498425
	13	6668.529221202985
	14	6673.494476828733
	15	6679.098564883429
	16	6683.786894631236
	17	6705.257674907231
	18	6701.704659906065
	19	6702.386935348483
	20	6707.03438260433

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 1

[32mCross validating...100%|████████████████████████████████| Time: 0:04:24[39m




Crossvalidation Results:
	k	MSE
	1	10098.226946761057
	2	6768.206383932517
	3	17643.700711053887
	4	27993.19765309823
	5	12241.783490114474
	6	6677.079103678738
	7	6864.583157889578
	8	6670.9571031633695
	9	6669.603707362191
	10	6672.613447453228
	11	6676.608106096841
	12	6689.418392518173
	13	6688.9911175087655
	14	6698.747164058988
	15	6714.022806828056
	16	6716.573069303678
	17	6717.732561441777
	18	6722.482957761041
	19	6729.705573887212
	20	6728.366658880353

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) =

[32mCross validating...100%|████████████████████████████████| Time: 0:03:25[39m




Crossvalidation Results:
	k	MSE
	1	3581.342048612435
	2	3513.215283303899
	3	4406.041357957129
	4	3596.9594029640766
	5	3557.1041690451048
	6	3536.6888326340663
	7	4150.64700411813
	8	3534.3340379874
	9	3445.2033897490865
	10	3429.275924150628
	11	3436.5008376643345
	12	3433.464437487205
	13	3439.0430317296778
	14	3435.0167500433513
	15	3434.3565564879905
	16	3439.669909496117
	17	3438.9413956978037
	18	3435.0614647645166
	19	3436.0578629939105
	20	3439.923861334214

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (

[32mCross validating...100%|████████████████████████████████| Time: 0:03:01[39m




Crossvalidation Results:
	k	MSE
	1	3585.1702543909296
	2	3519.872483445446
	3	12521.956302117287
	4	3585.035003648336
	5	3572.598916893062
	6	3494.606605276271
	7	3479.1047253530687
	8	3438.911904253481
	9	3437.3648287014785
	10	3438.5271897549737
	11	3438.7157774871857
	12	3444.9361113801197
	13	3443.8264631125335
	14	3444.8227107072075
	15	3444.7477370883093
	16	3451.510609276571
	17	3452.2222976493026
	18	3455.4385699143486
	19	3446.53744557995
	20	3446.569779090859

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

[32mCross validating...100%|████████████████████████████████| Time: 0:03:53[39m




Crossvalidation Results:
	k	MSE
	1	2599.087716489052
	2	2580.585891185769
	3	2885.8283354281384
	4	3299.421390416813
	5	20602.6274145902
	6	2667.5091221391685
	7	2523.652689140131
	8	2517.8975311211443
	9	2516.387788182443
	10	2512.2355478724717
	11	2503.5345433280772
	12	2492.67186139007
	13	2492.9081310662473
	14	2490.13890096223
	15	2492.3838508100016
	16	2492.7743924075567
	17	2495.6509759432133
	18	2491.2878818890927
	19	2486.0029592120363
	20	2484.6717801231616

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter 

[32mCross validating...100%|████████████████████████████████| Time: 0:04:03[39m




Crossvalidation Results:
	k	MSE
	1	2601.628052338199
	2	2555.22406070504
	3	2613.55302874496
	4	3125.7440979897574
	5	2657.571471475583
	6	4118.216113645324
	7	2538.8522678059617
	8	2530.1071019552687
	9	2549.6515854377885
	10	2545.6323594104238
	11	2591.2547666085925
	12	2607.882147714479
	13	2599.8473573303195
	14	2636.412728031504
	15	2681.4675946370417
	16	2693.4866243387087
	17	2699.9075021567132
	18	2745.2829174150265
	19	2737.6974764416236
	20	2778.465486537085

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter 

[32mCross validating...100%|████████████████████████████████| Time: 0:03:07[39m




Crossvalidation Results:
	k	MSE
	1	7472.199418907848
	2	7039.509573397878
	3	7016.166837259856
	4	7019.741424318413
	5	7029.228121622103
	6	7001.290116338355
	7	6995.0657220855
	8	6985.269130825478
	9	6973.840406016932
	10	6970.582958606619
	11	6980.454179679655
	12	6977.7510781370665
	13	6992.0151263146545
	14	6993.5589796205595
	15	7015.74604762154
	16	7006.451274508989
	17	7025.452824503191
	18	7056.192111769607
	19	7046.1946741418415
	20	7032.16496822047

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 10


[32mCross validating...100%|████████████████████████████████| Time: 0:03:16[39m




Crossvalidation Results:
	k	MSE
	1	7474.809441519521
	2	7043.630161876122
	3	7023.234586444985
	4	7012.098494090202
	5	6988.041055022806
	6	6990.342938585834
	7	6967.513933705987
	8	6974.215320899204
	9	7008.038686661159
	10	6999.412516259441
	11	7000.849392895102
	12	7015.345109158631
	13	7036.333927041987
	14	7040.2400720664
	15	7035.098976091398
	16	7049.180701848348
	17	7051.319563666598
	18	7071.46301943247
	19	7083.794419366952
	20	7075.579189113416

Best k = 7

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
Prio

[32mCross validating...100%|████████████████████████████████| Time: 0:02:45[39m




Crossvalidation Results:
	k	MSE
	1	9305.98737199372
	2	9260.050442640448
	3	9192.460524903707
	4	21506.98454075852
	5	9268.766000328329
	6	9133.165971161847
	7	9113.940642920894
	8	9142.389298051436
	9	9099.156595977185
	10	9107.777661367983
	11	9102.625833438402
	12	9103.573950543754
	13	9103.695807681528
	14	9100.987113372052
	15	9098.774004970317
	16	9109.646547209348
	17	9107.522490979789
	18	9114.748874326511
	19	9115.51973913973
	20	9114.571081578188

Best k = 15

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 15
Pr

[32mCross validating...100%|████████████████████████████████| Time: 0:02:54[39m




Crossvalidation Results:
	k	MSE
	1	9338.395796416324
	2	9289.449426369601
	3	9220.814372641282
	4	9213.527965030478
	5	9167.041612581954
	6	9159.606647278479
	7	9153.388931835883
	8	9193.41121355012
	9	9164.550451930883
	10	9141.51906239688
	11	9141.875877699647
	12	9137.189909254434
	13	9142.743019371992
	14	9148.512538564639
	15	9143.944930195874
	16	9146.681219106324
	17	9151.403936839277
	18	9148.387274997243
	19	9171.53539000762
	20	9168.185780272403

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
Pri

[32mCross validating...100%|████████████████████████████████| Time: 0:03:11[39m




Crossvalidation Results:
	k	MSE
	1	10051.911935813698
	2	9948.01560454145
	3	10027.029176380445
	4	10081.900362315146
	5	9891.845234875518
	6	9835.24327690305
	7	9839.098584851505
	8	9851.351345927591
	9	9854.156467839373
	10	9840.72868215894
	11	9839.91718278109
	12	9840.465921847059
	13	9841.156782821707
	14	9842.421079063062
	15	9844.171382086502
	16	9839.777227367482
	17	9841.786893017776
	18	9847.605777557947
	19	9841.172328484967
	20	9855.932765820557

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 6
Pri

[32mCross validating...100%|████████████████████████████████| Time: 0:03:41[39m




Crossvalidation Results:
	k	MSE
	1	10043.092397874194
	2	9938.268131026773
	3	9881.952388519278
	4	9905.636512283972
	5	9859.54318419168
	6	9825.927431888445
	7	9895.101117022896
	8	9830.88807295701
	9	9827.56574093743
	10	9848.64713825618
	11	9853.92517533917
	12	9866.756716129821
	13	9847.703952862503
	14	9881.924144251154
	15	9875.674790176556
	16	9881.31849969593
	17	9887.633546917627
	18	9868.371716302183
	19	9884.156984568368
	20	9884.62479378494

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
Prior w

[32mCross validating...100%|████████████████████████████████| Time: 0:03:14[39m




Crossvalidation Results:
	k	MSE
	1	4244.236224371355
	2	4197.835924827189
	3	4157.561405931374
	4	4141.98094797603
	5	4108.469786124792
	6	4085.8660652275885
	7	4086.202432987099
	8	4092.620947208784
	9	4099.246254311719
	10	4100.738901495755
	11	4115.395491565687
	12	4109.422869627086
	13	4106.977680634482
	14	4121.425237137209
	15	4119.210876251117
	16	4125.963780211497
	17	4147.683120591617
	18	4129.9406911482265
	19	4133.151749702714
	20	4147.561529815888

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 6
P

[32mCross validating...100%|████████████████████████████████| Time: 0:03:36[39m




Crossvalidation Results:
	k	MSE
	1	4241.383501806043
	2	4193.761208089117
	3	4152.36553978976
	4	4131.401729493474
	5	4100.4725630390585
	6	4081.434664714529
	7	4083.8070883905534
	8	4085.334490318558
	9	4092.027965269566
	10	4087.9140088978343
	11	4101.758786672064
	12	4098.8286364759015
	13	4108.54655515776
	14	4116.690870983628
	15	4118.464919745585
	16	4120.5722514573945
	17	4122.079822674036
	18	4133.745776016307
	19	4134.202861432667
	20	4137.382757004934

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 1

[32mCross validating...100%|████████████████████████████████| Time: 0:03:07[39m




Crossvalidation Results:
	k	MSE
	1	2949.2342053287734
	2	2847.681047740488
	3	2810.3121939322627
	4	2791.113050470051
	5	2755.883606542063
	6	2781.349258091901
	7	2784.6411856629147
	8	2745.7219645001605
	9	2711.632303974525
	10	2710.5148455807284
	11	2714.3376646743186
	12	2715.167125886222
	13	2716.033748522994
	14	2721.938069307728
	15	2718.5571067824494
	16	2717.0529486140076
	17	2723.146807669379
	18	2726.4205253190407
	19	2727.3195779137623
	20	2731.1097564657075

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:04:00[39m




Crossvalidation Results:
	k	MSE
	1	2942.8664804144337
	2	2840.301074873524
	3	2803.3916015073664
	4	2784.005434663946
	5	2748.5229011251786
	6	2747.665507980394
	7	2722.00097598643
	8	2720.7893570669107
	9	2705.182499542002
	10	2705.0978527360357
	11	2707.2235809916147
	12	2717.3344422302425
	13	2735.3702617365216
	14	2723.5325577693084
	15	2715.6574669987563
	16	2713.8604947308686
	17	2718.3732734038294
	18	2720.036196913088
	19	2725.0084415420442
	20	2716.2096068611227

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parame

[32mCross validating...100%|████████████████████████████████| Time: 0:02:00[39m




Crossvalidation Results:
	k	MSE
	1	1399.1131188505474
	2	1349.0192055402651
	3	1301.6186113270971
	4	1264.5490354333522
	5	1257.7373056249755
	6	1251.2210250364578
	7	1249.5933735021063
	8	1246.7418666938793
	9	1247.5616784124127
	10	1250.2214198432303
	11	1250.7028389614206
	12	1250.226126024923
	13	1254.2054346585294
	14	1254.9027629978148
	15	1256.80893111674
	16	1258.871739653156
	17	1259.4212163535487
	18	1260.0916665833647
	19	1261.3037885657448
	20	1262.9035429777784

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:01:58[39m




Crossvalidation Results:
	k	MSE
	1	1399.7201247938792
	2	1353.670244118552
	3	1301.7863950475353
	4	1264.3988663565533
	5	1266.683185429594
	6	1264.5372972902005
	7	1248.046642007024
	8	1244.6106032597968
	9	1247.7012866275936
	10	1251.9470329040748
	11	1248.01821028499
	12	1251.8342447624782
	13	1251.3198938811074
	14	1251.2160034678816
	15	1252.2868144126153
	16	1253.80935683703
	17	1253.8800169378173
	18	1259.151929208102
	19	1258.7686741328582
	20	1260.7662070424594

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:02:42[39m




Crossvalidation Results:
	k	MSE
	1	2126.2359304218626
	2	2121.316946110368
	3	2100.220724549762
	4	2086.188333354534
	5	2067.101440187569
	6	2062.006606206577
	7	2054.2702362856357
	8	2038.3061906324883
	9	2039.9568919580508
	10	2039.572311209361
	11	2026.5067147523991
	12	2025.1613614953471
	13	2014.825643582848
	14	2014.7268600169036
	15	2014.5698070553753
	16	2007.8657183414548
	17	2010.8847162413315
	18	2011.5781437772475
	19	2019.7882477577994
	20	2019.095141061859

Best k = 16

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:02:55[39m




Crossvalidation Results:
	k	MSE
	1	2127.150180018503
	2	2122.2170889279764
	3	2100.8614141952244
	4	2086.776396315176
	5	2061.772569262645
	6	2048.1028657422507
	7	2040.9567441921577
	8	2041.599691836468
	9	2031.141745477015
	10	2020.9141660771159
	11	2055.9552320739435
	12	2009.2996700713088
	13	2007.8943882817919
	14	2014.3083723283391
	15	2014.9850567525395
	16	2013.968874507653
	17	2009.9680566289983
	18	2010.8650638237418
	19	2009.6637978821327
	20	2011.1373335571893

Best k = 13

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity param

[32mCross validating...100%|████████████████████████████████| Time: 0:02:52[39m




Crossvalidation Results:
	k	MSE
	1	1961.9782988320167
	2	1989.1336271512528
	3	1971.2852576104829
	4	1925.45287396399
	5	1895.4908951295383
	6	1893.9019970216398
	7	1897.9592593108368
	8	1891.1579979777791
	9	1889.3901376018869
	10	1892.4796930136072
	11	1891.8567888465032
	12	1884.874427860296
	13	1889.1562011966382
	14	1896.2865062357428
	15	1893.5425566660174
	16	1893.8082469188923
	17	1891.9432343203305
	18	1889.7365690114207
	19	1894.390850908459
	20	1894.532012302166

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:03:07[39m




Crossvalidation Results:
	k	MSE
	1	1960.640864579992
	2	1971.5759015999922
	3	1954.384530821911
	4	1942.730440497413
	5	1936.61892638005
	6	1899.820474812993
	7	1889.6993552105698
	8	1912.8710133408174
	9	1887.8647575067314
	10	1886.2143600624563
	11	1894.9925681509385
	12	1885.6993683606238
	13	1900.8247927367188
	14	1884.7026382570696
	15	1885.6156538057994
	16	1902.7328739789077
	17	1888.3416052788068
	18	1892.4209473908832
	19	1924.9795327337602
	20	1900.1740419406083

Best k = 14

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity param

[32mCross validating...100%|████████████████████████████████| Time: 0:03:17[39m




Crossvalidation Results:
	k	MSE
	1	9947.296890843678
	2	9526.765152505597
	3	11779.68874578141
	4	9617.868575100227
	5	9517.801958479347
	6	9767.44886292671
	7	9673.811333577894
	8	9602.229261712953
	9	9605.555452974631
	10	9991.729880589264
	11	9544.746932675836
	12	9707.487903413572
	13	9613.216215681945
	14	10163.536482658812
	15	9671.452927934577
	16	10455.026831129051
	17	9646.803392439391
	18	10889.226067756252
	19	9764.762382733761
	20	9968.850322290964

Best k = 5

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 5


[32mCross validating...100%|████████████████████████████████| Time: 0:03:23[39m




Crossvalidation Results:
	k	MSE
	1	9940.543187425032
	2	9520.63563553389
	3	9434.920876990382
	4	10344.196289186522
	5	9927.390807126161
	6	9921.738819373844
	7	10546.121116801394
	8	9559.958162448676
	9	9703.160389542309
	10	9567.986841488893
	11	9822.294870206768
	12	9626.237975067648
	13	9652.894281009121
	14	9791.248200203998
	15	9670.915442340362
	16	9808.685080678395
	17	9578.060154963277
	18	9645.937554867789
	19	9910.892285434282
	20	9884.79186745537

Best k = 3

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
P

[32mCross validating...100%|████████████████████████████████| Time: 0:03:26[39m




Crossvalidation Results:
	k	MSE
	1	13012.022750627566
	2	12948.237759622965
	3	13268.322383871335
	4	13320.869790163224
	5	13148.33571456615
	6	13222.820878661883
	7	13472.456723018173
	8	24676.962568190393
	9	13179.59339430623
	10	13174.831829793962
	11	13069.14652932878
	12	13184.75191096668
	13	13533.951013496859
	14	12995.035542738971
	15	13277.8613656346
	16	13214.96119228099
	17	13484.812137283003
	18	13044.130645779667
	19	13224.192437519054
	20	13460.458172324805

Best k = 2

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:03:36[39m




Crossvalidation Results:
	k	MSE
	1	13011.981864380694
	2	12948.656067687154
	3	13750.629064199828
	4	13758.446014336032
	5	13092.364541729858
	6	13553.116291138762
	7	13696.255987886309
	8	21311.464244303446
	9	13764.788318940231
	10	13180.385898184117
	11	13282.58675205334
	12	13179.214438948615
	13	13460.908923665605
	14	13490.510804563603
	15	14304.386442521758
	16	13901.676313344431
	17	14728.101696624632
	18	14874.203480210655
	19	18175.18449643191
	20	15705.281445179677

Best k = 2

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity pa

[32mCross validating...100%|████████████████████████████████| Time: 0:03:10[39m




Crossvalidation Results:
	k	MSE
	1	3642.79814787957
	2	3628.1241701146982
	3	76766.70579811453
	4	7307.586520938392
	5	3980.4038163078753
	6	3983.7740038050506
	7	3874.51272118987
	8	3582.58180876627
	9	3583.0618529383473
	10	3581.3534653435654
	11	3585.6531646000994
	12	3580.201196472958
	13	3584.9822141517125
	14	3590.471595474416
	15	3591.699445464864
	16	3577.9623838449306
	17	3581.026989380336
	18	3577.1664647399416
	19	3575.1798113278833
	20	3579.573635834123

Best k = 19

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k

[32mCross validating...100%|████████████████████████████████| Time: 0:04:59[39m




Crossvalidation Results:
	k	MSE
	1	3640.924905347976
	2	3802.264022958581
	3	27007.32698159179
	4	4135.80734297686
	5	3729.104316454241
	6	3618.1363063483755
	7	3621.460076903358
	8	3606.769102696499
	9	3610.9592933661856
	10	3607.7318176822187
	11	3606.8195862276802
	12	3597.242906246094
	13	3582.3796477187175
	14	3577.6071231331284
	15	3582.63064277691
	16	3579.1884075326752
	17	3575.420790718575
	18	3575.7476729408927
	19	3592.041237665953
	20	3597.2747801076407

Best k = 17

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k

[32mCross validating...100%|████████████████████████████████| Time: 0:01:39[39m




Crossvalidation Results:
	k	MSE
	1	1895.3116040419927
	2	708.8743026722386
	3	689.6819725393705
	4	678.2397905930593
	5	676.1480578414281
	6	658.9505215177418
	7	651.1028228262088
	8	648.4943854808139
	9	643.4251514339461
	10	643.9153072927209
	11	645.1709525994681
	12	645.7592277277437
	13	646.4630849101713
	14	647.5828860025097
	15	648.2303000150566
	16	648.0027569270633
	17	649.6913910178524
	18	651.3299852983351
	19	651.8013620613547
	20	652.2235545258917

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 9
P

[32mCross validating...100%|████████████████████████████████| Time: 0:02:23[39m




Crossvalidation Results:
	k	MSE
	1	1893.9996193992006
	2	708.7870327173207
	3	689.5177774311061
	4	674.4164656012547
	5	681.9214460517392
	6	661.3412728049889
	7	650.923956685953
	8	647.8206078126251
	9	642.6473066081019
	10	643.3671725264644
	11	645.7722770110979
	12	647.5140663728835
	13	647.5960953825613
	14	649.0020738628953
	15	649.4006529965052
	16	650.717730175335
	17	651.1934708349488
	18	652.8799225592621
	19	651.4905930324701
	20	652.603554944766

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
Pri

[32mCross validating...100%|████████████████████████████████| Time: 0:04:25[39m




Crossvalidation Results:
	k	MSE
	1	4286.957395885216
	2	4113.409132182256
	3	4091.022847196564
	4	4218.978799462007
	5	4113.413844383152
	6	4037.3499666981634
	7	4018.5791270083896
	8	4003.0095546736
	9	4012.233190711667
	10	3991.3156594061547
	11	4001.830517486414
	12	3992.626391632719
	13	4036.7624309855837
	14	3983.6396959819376
	15	3991.8420869235065
	16	4003.6784013511933
	17	3995.429171355616
	18	4009.3995926295192
	19	4002.84893887891
	20	4000.5375149163465

Best k = 14

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k)

[32mCross validating...100%|████████████████████████████████| Time: 0:04:38[39m




Crossvalidation Results:
	k	MSE
	1	4287.676758596179
	2	4113.4401448021645
	3	4090.83312120229
	4	4072.933639575972
	5	4053.667991088471
	6	4033.6423827673098
	7	4018.420534856603
	8	4014.1201055700294
	9	3983.8029929363383
	10	3991.0037836464835
	11	3985.435131898018
	12	4003.0621444391427
	13	4002.9658860876216
	14	4007.11364587366
	15	4005.3678129936497
	16	3993.658253327273
	17	4017.8655376059787
	18	4051.5958623351275
	19	4046.837776549384
	20	4019.562628341484

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k

[32mCross validating...100%|████████████████████████████████| Time: 0:04:38[39m




Crossvalidation Results:
	k	MSE
	1	6644.190189656684
	2	6323.768584039205
	3	7452.9545976593145
	4	6272.817304424122
	5	6233.153317099792
	6	6258.290721703608
	7	6248.967838467211
	8	6246.849622413312
	9	6284.385914605509
	10	6269.693526738198
	11	6238.486219804894
	12	6223.897496554079
	13	6240.122599148202
	14	6246.302730252291
	15	6242.16196448419
	16	6235.068049964559
	17	6238.37542686255
	18	6240.950815005985
	19	6291.791119394435
	20	6269.72581872072

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
Pr

[32mCross validating...100%|████████████████████████████████| Time: 0:05:04[39m




Crossvalidation Results:
	k	MSE
	1	6626.493573383584
	2	6302.110982719853
	3	6261.2071583190955
	4	6204.32986733236
	5	6207.88945631343
	6	6193.3655131376745
	7	6200.027114569599
	8	6201.584678683672
	9	6233.961490828966
	10	6214.65216660369
	11	6247.132247754193
	12	6235.855513227682
	13	6223.046696783778
	14	6217.216137049305
	15	6248.165017746403
	16	6236.33358732366
	17	6238.680711779096
	18	6269.77893399597
	19	6248.889390166834
	20	6263.63578291394

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
Prior

([6.0, 8.0, 5.0, 5.0, 1.0, 7.0, 5.0, 5.0, 4.0, 4.0, 3.0, 4.0, 7.0, 5.0, 1.0, 0.0, 6.0, 5.0, 6.0, 4.0], [200.0, 67.0, 200.0, 200.0, 200.0, 200.0, 200.0, 43.0, 10.0, 7.0, 52.0, 60.0, 200.0, 200.0, 200.0, 5.0, 78.0, 14.0, 200.0, 200.0], [17.0, 20.0, 17.0, 12.0, 10.0, 20.0, 10.0, 15.0, 6.0, 6.0, 10.0, 8.0, 16.0, 12.0, 5.0, 2.0, 19.0, 9.0, 14.0, 12.0], [5.0, 8.0, 0.0, 5.0, 6.0, 4.0, 5.0, 5.0, 8.0, 6.0, 4.0, 4.0, 6.0, 5.0, 3.0, 4.0, 6.0, 6.0, 6.0, 4.0], [200.0, 81.0, 200.0, 200.0, 134.0, 75.0, 200.0, 96.0, 13.0, 200.0, 30.0, 44.0, 200.0, 200.0, 200.0, 200.0, 79.0, 17.0, 200.0, 200.0], [12.0, 5.0, 8.0, 9.0, 9.0, 8.0, 7.0, 12.0, 6.0, 6.0, 10.0, 8.0, 13.0, 14.0, 3.0, 2.0, 17.0, 9.0, 9.0, 6.0])

In [7]:
@show mean(no_init_TP)
@show mean(yes_init_TP)
[no_init_TP yes_init_TP]

mean(no_init_TP) = 4.55
mean(yes_init_TP) = 5.0


20×2 Matrix{Float64}:
 6.0  5.0
 8.0  8.0
 5.0  0.0
 5.0  5.0
 1.0  6.0
 7.0  4.0
 5.0  5.0
 5.0  5.0
 4.0  8.0
 4.0  6.0
 3.0  4.0
 4.0  4.0
 7.0  6.0
 5.0  5.0
 1.0  3.0
 0.0  4.0
 6.0  6.0
 5.0  6.0
 6.0  6.0
 4.0  4.0

In [8]:
@show mean(no_init_iter)
@show mean(yes_init_iter)
[no_init_iter yes_init_iter]


mean(no_init_iter) = 126.8
mean(yes_init_iter) = 138.45


20×2 Matrix{Float64}:
 200.0  200.0
  67.0   81.0
 200.0  200.0
 200.0  200.0
 200.0  134.0
 200.0   75.0
 200.0  200.0
  43.0   96.0
  10.0   13.0
   7.0  200.0
  52.0   30.0
  60.0   44.0
 200.0  200.0
 200.0  200.0
 200.0  200.0
   5.0  200.0
  78.0   79.0
  14.0   17.0
 200.0  200.0
 200.0  200.0

In [9]:
@show mean(no_init_best_k)
@show mean(yes_init_best_k)
[no_init_best_k yes_init_best_k]

mean(no_init_best_k) = 12.0
mean(yes_init_best_k) = 8.65


20×2 Matrix{Float64}:
 17.0  12.0
 20.0   5.0
 17.0   8.0
 12.0   9.0
 10.0   9.0
 20.0   8.0
 10.0   7.0
 15.0  12.0
  6.0   6.0
  6.0   6.0
 10.0  10.0
  8.0   8.0
 16.0  13.0
 12.0  14.0
  5.0   3.0
  2.0   2.0
 19.0  17.0
  9.0   9.0
 14.0   9.0
 12.0   6.0

## Try 20 simulations (simulated genotypes)

We will compare

+ Number of true positives
+ Number of iteration until convergence
+ Best cross validated k

in scenario where we initialize beta values, versus not initializing. 

In [51]:
function bench(sim::Int)
    # simulation parameters
    n = 1000
    p = 10000
    k = 10
    r = 2
    βσ = 0.1
    βoverlap = 0

    no_init_TP = zeros(sim)
    no_init_iter = zeros(sim)
    no_init_best_k = zeros(sim)
    yes_init_TP = zeros(sim)
    yes_init_iter = zeros(sim)
    yes_init_best_k = zeros(sim)

    for i in 1:sim
        xla, Z, true_b, Σ, Y = simulate_multivariate_sparse(n, p, k,r,seed=i,βoverlap=βoverlap)
        causal_snps = unique([x[1] for x in findall(!iszero, true_b)])

        # not initializing beta
        mses = cv_iht(Y, Transpose(xla), path=1:20, verbose=true, debias=true)
        result = fit_iht(Y, Transpose(xla), k=argmin(mses), verbose=true, debias=true)
        no_init_TP[i] = count(!iszero, sum(result.beta[:, causal_snps], dims=1))
        no_init_iter[i] = result.iter
        no_init_best_k[i] = argmin(mses)

        # initializing beta
        mses = cv_iht(Y, Transpose(xla), path=1:20, init_beta=true, debias=true)
        result = fit_iht(Y, Transpose(xla), k=argmin(mses), verbose=true, init_beta = true, debias=true)
        yes_init_TP[i] = count(!iszero, sum(result.beta[:, causal_snps], dims=1))
        yes_init_iter[i] = result.iter
        yes_init_best_k[i] = argmin(mses)
    end

    return no_init_TP, no_init_iter, no_init_best_k, 
        yes_init_TP, yes_init_iter, yes_init_best_k
end
@time no_init_TP, no_init_iter, no_init_best_k, 
    yes_init_TP, yes_init_iter, yes_init_best_k = bench(20)

[32mCross validating...100%|████████████████████████████████| Time: 0:00:15[39m




Crossvalidation Results:
	k	MSE
	1	3054.793264535042
	2	3056.3025981493015
	3	1385.711463791572
	4	826.9266005988334
	5	380.7077275030195
	6	285.15270616414927
	7	204.6031797484711
	8	133.7885163523987
	9	124.53754284424802
	10	125.37175558572136
	11	126.41997609398638
	12	123.89158338622163
	13	125.80471478106737
	14	126.75551440247028
	15	126.86603492736293
	16	129.20003074764134
	17	127.65346578207303
	18	128.52755513237457
	19	128.4157521947696
	20	130.262818111156

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:00:23[39m




Crossvalidation Results:
	k	MSE
	1	2473.744641162287
	2	1919.7695355012788
	3	1317.20922785413
	4	792.3897862079906
	5	379.9714410550192
	6	289.69404287594557
	7	201.89947111325324
	8	133.9479264279291
	9	124.76775416400523
	10	124.52811014708195
	11	124.34615657946154
	12	124.12115316781934
	13	125.48194803186833
	14	126.03742157043308
	15	125.51817156761408
	16	126.30416781908514
	17	126.95891248072653
	18	126.9935833061806
	19	126.8552408723718
	20	127.1870711317012

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:00:09[39m




Crossvalidation Results:
	k	MSE
	1	4119.3983093945135
	2	4120.837500801534
	3	2013.5242202569775
	4	1776.9374141581254
	5	1596.1956772417707
	6	1502.4562244456888
	7	1323.0838275703538
	8	1198.4840495920337
	9	1102.2826580576216
	10	1092.6874241211406
	11	1095.48435645757
	12	1095.0891890878167
	13	1099.5622332658997
	14	1106.571232293944
	15	1102.2928951228516
	16	1094.4424107718485
	17	1100.6685312509196
	18	1105.2567985582623
	19	1108.4377136243459
	20	1102.9775329977501

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity par

[32mCross validating...100%|████████████████████████████████| Time: 0:00:15[39m




Crossvalidation Results:
	k	MSE
	1	2798.60619505886
	2	2148.5857904715112
	3	1952.7272587036596
	4	1840.3669635949036
	5	1695.3695270771846
	6	1495.4173548738952
	7	1364.0427338512004
	8	1194.2288130340942
	9	1098.0715790965578
	10	1088.250486660775
	11	1091.4662916086174
	12	1089.9724593834424
	13	1089.925349003729
	14	1089.0306077049863
	15	1082.3717676293954
	16	1082.6103302140482
	17	1085.8803021650485
	18	1075.2070520963164
	19	1075.7444077345951
	20	1076.3254157759068

Best k = 18

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity par

[32mCross validating...100%|████████████████████████████████| Time: 0:00:08[39m




Crossvalidation Results:
	k	MSE
	1	3869.022775012708
	2	3875.1938964399674
	3	2075.977491544234
	4	1737.7532663856196
	5	1285.1535926254687
	6	1215.978062601306
	7	1099.941356626512
	8	1023.5244959943462
	9	1022.5384469893854
	10	1017.1694205304206
	11	1019.4075227201886
	12	1017.0047533345244
	13	1019.6719630281939
	14	1018.3674994655545
	15	1016.3361727047584
	16	1013.264505932913
	17	1018.98544460128
	18	1008.516530184434
	19	1015.1356196832659
	20	1011.224985066686

Best k = 18

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:00:10[39m




Crossvalidation Results:
	k	MSE
	1	3020.16117700283
	2	2535.6808698483724
	3	2075.585722290315
	4	1729.8005923977657
	5	1287.8083884743871
	6	1198.918665824035
	7	1104.4456432796246
	8	1027.5678335605487
	9	1024.1607671602574
	10	1028.3048699773342
	11	1026.7290594323827
	12	1035.8845431377872
	13	1021.6160556289783
	14	1025.3441804362383
	15	1029.8956048445634
	16	1025.183620431243
	17	1028.0814938366034
	18	1019.0147640246081
	19	1023.7312992844454
	20	1025.4666238948535

Best k = 18

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:00:23[39m




Crossvalidation Results:
	k	MSE
	1	1846.6633546046774
	2	1847.395040964263
	3	565.6942670001745
	4	379.1690429704304
	5	326.9826241954661
	6	301.05220970149367
	7	274.09411400128414
	8	265.5431708553567
	9	265.12142710861815
	10	265.3936303893283
	11	265.72886022665824
	12	265.7870249105555
	13	265.6703148820851
	14	265.76879402210767
	15	265.89773607691023
	16	266.9848770990996
	17	266.9904607816972
	18	264.2972976270876
	19	264.1129739058763
	20	264.608666013348

Best k = 19

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k)

[32mCross validating...100%|████████████████████████████████| Time: 0:00:31[39m




Crossvalidation Results:
	k	MSE
	1	1021.9741687035303
	2	722.4094387675605
	3	576.5818191108913
	4	380.72450000149774
	5	327.3646023916981
	6	302.0531516364359
	7	273.78184114631705
	8	265.31698647174727
	9	265.74403531389447
	10	265.4147274842746
	11	265.40478580293524
	12	265.1056297749764
	13	264.3114211494764
	14	263.17425904335647
	15	265.4616569858104
	16	263.989286962898
	17	264.2018746265177
	18	263.80109262116736
	19	263.0350720079289
	20	262.94818880968023

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (

[32mCross validating...100%|████████████████████████████████| Time: 0:00:23[39m




Crossvalidation Results:
	k	MSE
	1	1958.132280721401
	2	1957.611823106944
	3	697.5017339117612
	4	621.2306804999038
	5	541.4859036286252
	6	500.6715945305205
	7	485.44948712181554
	8	487.65610622313784
	9	488.9125713250768
	10	489.6146294798481
	11	489.2026420167265
	12	491.8354777937956
	13	493.60405286608494
	14	493.75146630924917
	15	492.98210154627935
	16	493.015519158939
	17	492.1521822513245
	18	492.14950114044825
	19	496.7680075561036
	20	494.0763850576087

Best k = 7

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) =

[32mCross validating...100%|████████████████████████████████| Time: 0:00:21[39m




Crossvalidation Results:
	k	MSE
	1	1421.710668564503
	2	1055.0324691213789
	3	695.348936150441
	4	624.1879940949743
	5	539.8483790297054
	6	498.5449784363148
	7	483.50728342693026
	8	485.3630465097024
	9	487.84452047886117
	10	489.52520534187823
	11	490.6309919810383
	12	492.14006745451854
	13	493.7704607320317
	14	493.8797854701312
	15	492.8043657701295
	16	495.43548513155343
	17	494.2426195557679
	18	497.01316739578067
	19	498.5982336679815
	20	497.0581952691556

Best k = 7

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) 

[32mCross validating...100%|████████████████████████████████| Time: 0:00:31[39m




Crossvalidation Results:
	k	MSE
	1	2138.9386410849193
	2	2138.2855402340547
	3	1335.8077137346972
	4	1284.7414770061027
	5	1246.736496695918
	6	1229.2728136380615
	7	1226.8057573145147
	8	1230.8613848476502
	9	1234.529926978348
	10	1224.8433288381832
	11	1232.2930437994817
	12	1227.9268538528756
	13	1229.0993938174345
	14	1227.1996585251056
	15	1227.049667870438
	16	1233.3707913841272
	17	1236.2717000762177
	18	1237.3917157163664
	19	1239.6339461322875
	20	1234.6750255730142

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity pa

[32mCross validating...100%|████████████████████████████████| Time: 0:00:55[39m




Crossvalidation Results:
	k	MSE
	1	1729.2031170714963
	2	1461.7805423452585
	3	1332.0166424445795
	4	1281.623817940994
	5	1243.4242610028673
	6	1233.222767169832
	7	1221.123578512562
	8	1213.4344761066534
	9	1217.0318544398915
	10	1216.4279584342573
	11	1208.6433641008452
	12	1204.3766201532862
	13	1201.6060667679158
	14	1207.1260010673136
	15	1208.9161170798252
	16	1210.10651430013
	17	1217.2107006131491
	18	1214.1483532186558
	19	1207.8607829687676
	20	1220.0702397625957

Best k = 13

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:00:07[39m




Crossvalidation Results:
	k	MSE
	1	1511.2322420671592
	2	1512.1423627927393
	3	786.4875466292978
	4	693.7981187280933
	5	625.8224182880062
	6	560.8524058315137
	7	483.50581905388924
	8	418.59056279365353
	9	377.48620922394446
	10	381.31109480860596
	11	383.3439881094312
	12	384.0909799635849
	13	382.2067116488125
	14	384.8954365349034
	15	388.0612316939862
	16	390.30874050241937
	17	393.37935940354066
	18	395.72304110521566
	19	395.27507529170407
	20	396.21673588953786

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

[32mCross validating...100%|████████████████████████████████| Time: 0:00:10[39m




Crossvalidation Results:
	k	MSE
	1	1218.338824447639
	2	964.0332750708242
	3	778.9647088641252
	4	687.6248566693906
	5	620.9045547871892
	6	566.3136018193694
	7	488.89322173994276
	8	417.00457298803326
	9	376.50236398534787
	10	374.9992996381239
	11	374.79621419321967
	12	374.5835532424792
	13	375.236922873782
	14	375.55045478561954
	15	375.77074596339224
	16	374.14441314572395
	17	375.7591662631624
	18	375.77872415910736
	19	375.2832065286753
	20	377.3596431458522

Best k = 16

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k

[32mCross validating...100%|████████████████████████████████| Time: 0:00:29[39m




Crossvalidation Results:
	k	MSE
	1	2492.1690509964756
	2	2492.213698089151
	3	927.1101928599843
	4	692.4129270014442
	5	617.340104134025
	6	478.64715935658336
	7	413.332121153853
	8	348.51777048051827
	9	328.77055428419135
	10	331.1280257501866
	11	337.97566655606056
	12	339.70833910820556
	13	336.75759259118337
	14	339.0300034144968
	15	336.33078246070943
	16	336.0169672333466
	17	335.2222043292088
	18	349.3710276985849
	19	336.91250610324596
	20	343.6870391862432

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k)

[32mCross validating...100%|████████████████████████████████| Time: 0:00:23[39m




Crossvalidation Results:
	k	MSE
	1	1590.7913165588086
	2	1175.4619112402427
	3	923.4852587324033
	4	690.9886097279301
	5	601.9711110430652
	6	471.31476648753
	7	409.0824967925943
	8	347.77015132491334
	9	336.60313796159596
	10	349.39006416803045
	11	334.1908432630537
	12	324.5148459341884
	13	327.9170278452582
	14	346.391090007148
	15	344.21362826763965
	16	327.37812136127883
	17	326.62727530974337
	18	327.06244970219547
	19	327.2801205431165
	20	326.09193020348005

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k

[32mCross validating...100%|████████████████████████████████| Time: 0:00:21[39m




Crossvalidation Results:
	k	MSE
	1	3188.1195804944764
	2	3187.8344372100332
	3	1847.6896293028726
	4	1605.9140354877713
	5	1270.6309965809585
	6	1230.143809728698
	7	1174.9291468167476
	8	1148.3483913990012
	9	1127.1797743615757
	10	1117.9330096978938
	11	1095.922895312841
	12	1100.8274152317915
	13	1100.161223169397
	14	1101.4193353985056
	15	1105.9522399144648
	16	1110.2674615960364
	17	1103.3897956429935
	18	1101.4353661645757
	19	1108.661758502375
	20	1099.6946161510587

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity par

[32mCross validating...100%|████████████████████████████████| Time: 0:00:18[39m




Crossvalidation Results:
	k	MSE
	1	2459.576063226315
	2	2135.87141046526
	3	1776.5737423801081
	4	1517.7322348657115
	5	1276.6346778293891
	6	1236.427024531175
	7	1181.2221558409904
	8	1137.5188403885556
	9	1124.1296049861726
	10	1107.0139334732592
	11	1092.539015548111
	12	1094.3401357639648
	13	1100.8876827795586
	14	1100.385144313083
	15	1102.0087422449778
	16	1104.0667018085808
	17	1096.5407872269925
	18	1105.0847002876892
	19	1102.5276083425692
	20	1096.9255611038477

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity param

[32mCross validating...100%|████████████████████████████████| Time: 0:00:07[39m




Crossvalidation Results:
	k	MSE
	1	5147.23365928719
	2	5150.034254823294
	3	2811.8131858385777
	4	2400.1633937632773
	5	2132.8645257483176
	6	1839.6450416212788
	7	1746.7556879051774
	8	1688.6779130346426
	9	1651.782948788346
	10	1645.5034294770057
	11	1658.1825433086626
	12	1659.372649697851
	13	1662.9037503072996
	14	1671.8199490710695
	15	1673.219432831435
	16	1679.4635366633831
	17	1682.8264493235463
	18	1682.8874968450943
	19	1692.5849293687124
	20	1709.272905026929

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parame

[32mCross validating...100%|████████████████████████████████| Time: 0:00:27[39m




Crossvalidation Results:
	k	MSE
	1	3802.519744492587
	2	3268.9999840722894
	3	2848.554671195396
	4	2394.070046601168
	5	2181.7603638492096
	6	1837.5694111308956
	7	1745.0862711428833
	8	1705.8284953555258
	9	1674.8793821416698
	10	1641.4820233382475
	11	1640.8950796340077
	12	1637.6059052288747
	13	1640.9691068290772
	14	1641.3381678254516
	15	1642.735135607188
	16	1643.0653440366734
	17	1642.8778454434312
	18	1648.7051637813256
	19	1650.2770964800088
	20	1647.31389759684

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity param

[32mCross validating...100%|████████████████████████████████| Time: 0:00:10[39m




Crossvalidation Results:
	k	MSE
	1	2072.4325491542727
	2	2073.517202242454
	3	434.89722511544005
	4	362.93913303284535
	5	307.5711717490205
	6	270.3198946320036
	7	243.29432295144738
	8	185.75068910273052
	9	151.47227982282826
	10	151.2872696152945
	11	151.12618235528686
	12	150.8621350067911
	13	150.55432285590672
	14	150.52123586887245
	15	150.3487803956017
	16	150.12541217260264
	17	149.9388074230147
	18	150.34291288771865
	19	151.08834468462427
	20	151.13342853991213

Best k = 17

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parame

[32mCross validating...100%|████████████████████████████████| Time: 0:00:14[39m




Crossvalidation Results:
	k	MSE
	1	1047.4746496557277
	2	533.2776840463414
	3	394.59734409159506
	4	331.70004703565536
	5	289.25912350667625
	6	247.17335100915778
	7	213.2325685733413
	8	178.21060236468847
	9	152.9246225248254
	10	152.6947320257058
	11	152.42182139923526
	12	152.65986117952204
	13	152.21420578586626
	14	151.7352185132665
	15	151.32925141938318
	16	151.1653544763367
	17	151.2451682603617
	18	151.71574126724005
	19	151.6983703847312
	20	151.38635329716337

Best k = 16

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:00:19[39m




Crossvalidation Results:
	k	MSE
	1	3503.697486690358
	2	3504.7903351123987
	3	1723.9189995152708
	4	1695.2966150956736
	5	492.79899304769026
	6	269.1566853222015
	7	189.1011089153282
	8	261.0559873815122
	9	137.5708442602703
	10	138.1503987029287
	11	138.94720762498048
	12	138.921416890065
	13	139.22953509101887
	14	139.98864298590956
	15	139.9715597180941
	16	140.59174636768233
	17	140.01112410913458
	18	141.73926092304652
	19	140.69658318786142
	20	140.62363946880876

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

[32mCross validating...100%|████████████████████████████████| Time: 0:00:12[39m




Crossvalidation Results:
	k	MSE
	1	2413.6888296705674
	2	1247.8850451469452
	3	920.3514994885609
	4	632.5284173351935
	5	352.2738354799583
	6	253.71788009617296
	7	189.75607553169218
	8	136.312364796619
	9	137.51924946867297
	10	137.66368320373522
	11	138.56618512628663
	12	138.7821562354303
	13	139.0602821172608
	14	140.01048139134772
	15	139.15017631719783
	16	141.34185404301402
	17	139.31757442405424
	18	139.3424737281516
	19	139.19068738998416
	20	140.708234879069

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter 

[32mCross validating...100%|████████████████████████████████| Time: 0:00:11[39m




Crossvalidation Results:
	k	MSE
	1	1600.1650836343617
	2	1600.468780553846
	3	957.3113135587831
	4	792.0733733764637
	5	658.7052482970164
	6	642.8878567675172
	7	619.3190107890027
	8	606.9004038624892
	9	601.2584871421284
	10	598.6443818815213
	11	597.3860353021865
	12	597.9454655217892
	13	596.2097716114391
	14	599.2167042023062
	15	597.2045814186195
	16	596.6087626424012
	17	598.2713548324164
	18	597.9378424170247
	19	594.2967618492546
	20	596.7746091765706

Best k = 19

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 19

[32mCross validating...100%|████████████████████████████████| Time: 0:00:10[39m




Crossvalidation Results:
	k	MSE
	1	1190.7099715514287
	2	1026.9442958497134
	3	862.1958582730994
	4	721.9927431935703
	5	660.2380167974266
	6	625.3741560861182
	7	609.6006685805122
	8	606.894438735326
	9	603.9426077246831
	10	600.2973789356471
	11	604.4107008736172
	12	607.409304025802
	13	609.2139505791195
	14	609.2084303761195
	15	608.1969192998574
	16	612.3695586720862
	17	610.5689186414718
	18	615.7663431539111
	19	614.3832517384268
	20	616.845180201266

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 10
P

[32mCross validating...100%|████████████████████████████████| Time: 0:00:17[39m




Crossvalidation Results:
	k	MSE
	1	3221.6110618786697
	2	3222.6612410354533
	3	1882.0341572388802
	4	1395.44079919574
	5	1032.8116474243134
	6	809.460502871215
	7	657.1647550753144
	8	578.6671555657773
	9	528.8253299682052
	10	512.8690132986966
	11	515.3024581529684
	12	516.5350452190363
	13	514.4239632106669
	14	514.9469703232705
	15	516.4488423252126
	16	515.3103882921828
	17	518.7811603903832
	18	518.3694860935952
	19	520.488933395279
	20	518.5960864826948

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 10

[32mCross validating...100%|████████████████████████████████| Time: 0:00:16[39m




Crossvalidation Results:
	k	MSE
	1	2408.5564036060273
	2	1870.50154580569
	3	1483.913210395834
	4	1214.574854402113
	5	942.5453064993692
	6	788.7773597179912
	7	654.0720975815073
	8	575.7468137933943
	9	525.7610813954575
	10	509.35964925734265
	11	511.30118780146887
	12	512.3445850847954
	13	513.3815988788772
	14	512.6356987044416
	15	515.5611098823166
	16	517.2045566432719
	17	513.8619703786889
	18	516.7653774364561
	19	511.76917123334255
	20	514.0150464720224

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 

[32mCross validating...100%|████████████████████████████████| Time: 0:00:14[39m




Crossvalidation Results:
	k	MSE
	1	2943.393376206929
	2	2944.693176632575
	3	2446.480671421018
	4	2317.1380058723
	5	2261.7631407769145
	6	2237.399189246277
	7	2235.16896801013
	8	2214.364317104761
	9	2210.7528514087476
	10	2213.865358008438
	11	2225.4878033527184
	12	2223.5606340028075
	13	2254.530809733114
	14	2239.2602195317027
	15	2263.5971388586872
	16	2254.4285334915494
	17	2272.623551328191
	18	2273.0284982902845
	19	2279.0226967266935
	20	2288.9783410397295

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k)

[32mCross validating...100%|████████████████████████████████| Time: 0:00:14[39m




Crossvalidation Results:
	k	MSE
	1	2800.2712125763783
	2	2540.0524461341274
	3	2408.934491958782
	4	2310.513915357925
	5	2289.725886509719
	6	2254.1718525777005
	7	2205.3453248359738
	8	2202.59998859057
	9	2217.9616760586873
	10	2228.3491401851757
	11	2245.234726040033
	12	2231.110346823189
	13	2252.204166441968
	14	2252.2035967451066
	15	2245.3553752506173
	16	2253.633839280405
	17	2251.060632905433
	18	2249.5750855745014
	19	2262.2823766920446
	20	2272.017538052177

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (

[32mCross validating...100%|████████████████████████████████| Time: 0:00:55[39m




Crossvalidation Results:
	k	MSE
	1	2417.1611704308125
	2	2418.016164932355
	3	952.7904085102643
	4	561.6787694934544
	5	483.1375655216543
	6	357.823229655095
	7	354.4337705751964
	8	360.100840829463
	9	354.84764311191293
	10	352.49723867189095
	11	354.03775773567145
	12	336.62005865349107
	13	339.1003617484738
	14	339.5466703502556
	15	342.5350198188032
	16	342.1954879514577
	17	342.2937752314044
	18	344.0445572745888
	19	346.59167233842743
	20	344.6927015919345

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) =

[32mCross validating...100%|████████████████████████████████| Time: 0:00:59[39m




Crossvalidation Results:
	k	MSE
	1	1870.0634761636568
	2	1365.0957268950538
	3	952.7779317468793
	4	562.7341220533084
	5	488.1022628637601
	6	361.2680308689551
	7	361.72634230968066
	8	356.3480902538472
	9	354.42511440387034
	10	343.549855542825
	11	336.85767945361476
	12	339.4546763123222
	13	342.5707654309256
	14	343.0657684118446
	15	343.21094212197954
	16	343.74952374567476
	17	343.26808469184374
	18	347.228333240942
	19	350.1080375423589
	20	350.37386855456714

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k

[32mCross validating...100%|████████████████████████████████| Time: 0:00:19[39m




Crossvalidation Results:
	k	MSE
	1	2926.6637402656243
	2	2926.1564717125366
	3	1712.5804691512137
	4	1308.7302933919011
	5	1188.5491818191176
	6	1128.8690559740448
	7	1104.300991533311
	8	1088.7385281212719
	9	1084.5685797959377
	10	1073.4793828599834
	11	1072.4185870611595
	12	1073.1432344021516
	13	1076.5540218094297
	14	1074.6105323849772
	15	1076.335829096547
	16	1079.5648849727654
	17	1078.8296332151283
	18	1084.8950528635667
	19	1090.2659956002626
	20	1076.6118454817274

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity p

[32mCross validating...100%|████████████████████████████████| Time: 0:00:11[39m




Crossvalidation Results:
	k	MSE
	1	1995.6476975889118
	2	1530.4539395420366
	3	1381.74382918009
	4	1324.3937576912126
	5	1220.4199750694893
	6	1132.1208491983384
	7	1107.6431823273101
	8	1092.8156872552918
	9	1081.250181173583
	10	1077.4235569060772
	11	1077.8997407521842
	12	1077.3717261444353
	13	1081.5314104126683
	14	1078.0565414494392
	15	1080.8124825664502
	16	1085.285395507943
	17	1080.1975000402008
	18	1087.9799347051267
	19	1084.4702208993021
	20	1086.1085701370807

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity par

[32mCross validating...100%|████████████████████████████████| Time: 0:00:17[39m




Crossvalidation Results:
	k	MSE
	1	1809.8400472299572
	2	1810.4241986331258
	3	1390.9446057655316
	4	1327.7980291229148
	5	1214.407400854655
	6	1175.7327658678364
	7	1153.907702396611
	8	1159.180683218176
	9	1163.4584260390336
	10	1161.528758840796
	11	1171.2283087451392
	12	1166.1951790766009
	13	1166.8943125647525
	14	1169.841956552938
	15	1167.9511912688508
	16	1178.5978513224054
	17	1176.092835462503
	18	1180.5723113034196
	19	1190.0324247246
	20	1190.6957134486784

Best k = 7

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

[32mCross validating...100%|████████████████████████████████| Time: 0:00:23[39m




Crossvalidation Results:
	k	MSE
	1	1599.6669332814313
	2	1460.6935108043645
	3	1352.9779922859034
	4	1264.395835854008
	5	1213.5145887301364
	6	1174.875751530895
	7	1152.7602045793121
	8	1153.6815536883284
	9	1161.396565697047
	10	1162.5655246880133
	11	1173.094801093199
	12	1173.9959613349702
	13	1180.4877390696295
	14	1183.167720667702
	15	1190.4145300494827
	16	1193.1814145711523
	17	1186.761301214222
	18	1195.0540868196233
	19	1195.5427801947633
	20	1203.6638920541982

Best k = 7

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parame

[32mCross validating...100%|████████████████████████████████| Time: 0:00:17[39m




Crossvalidation Results:
	k	MSE
	1	3257.023842514791
	2	3257.205350131313
	3	1816.13076482183
	4	1665.971778869649
	5	1621.0960806327325
	6	1618.430705594906
	7	1600.1015730181698
	8	1608.0132133617624
	9	1608.4413750105477
	10	1600.2862886875
	11	1609.261577966693
	12	1615.7943301169148
	13	1616.2492312714703
	14	1618.9348170170415
	15	1614.6750052852221
	16	1621.89780833151
	17	1620.619380828489
	18	1623.8990165376517
	19	1623.3905348780656
	20	1628.351809738397

Best k = 7

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) 

[32mCross validating...100%|████████████████████████████████| Time: 0:00:21[39m




Crossvalidation Results:
	k	MSE
	1	2539.8857087649376
	2	1989.3459373398186
	3	1813.2714836626474
	4	1664.2199175109122
	5	1619.3318716961517
	6	1601.9029835794584
	7	1590.820970208554
	8	1601.5936184549082
	9	1599.0333296908984
	10	1595.8155973739056
	11	1595.2312577867765
	12	1604.9671732894674
	13	1606.3143303497313
	14	1621.7671409987408
	15	1621.4566232032848
	16	1643.3405443734482
	17	1644.2733390562923
	18	1640.259349938944
	19	1648.1714421339466
	20	1671.659618111966

Best k = 7

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity par

[32mCross validating...100%|████████████████████████████████| Time: 0:00:25[39m




Crossvalidation Results:
	k	MSE
	1	2990.8474096892164
	2	2992.3088051990553
	3	1494.4717682845976
	4	1341.534072974977
	5	1332.648046300467
	6	1334.6513808691127
	7	1333.110226331015
	8	1330.5466495974354
	9	1328.4768057415151
	10	1337.4089666182967
	11	1334.1459974519678
	12	1336.912620283119
	13	1331.078924590034
	14	1333.87966276255
	15	1341.988039310459
	16	1334.9788577561853
	17	1342.0987475741301
	18	1337.6766283881086
	19	1354.5854036401154
	20	1340.6160370775158

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:00:34[39m




Crossvalidation Results:
	k	MSE
	1	1996.313263237039
	2	1689.8554332635886
	3	1470.7049120019306
	4	1344.0057838453026
	5	1335.0150882759845
	6	1337.1938936153776
	7	1335.3770454500113
	8	1332.257798533526
	9	1332.4252532655528
	10	1330.9335550806786
	11	1340.098699468186
	12	1332.0911905742512
	13	1332.7314445059524
	14	1326.755867186044
	15	1334.2289438042749
	16	1341.185598080644
	17	1332.36385948852
	18	1335.0200704103552
	19	1341.4319954894136
	20	1341.1683986073658

Best k = 14

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parame

([9.0, 9.0, 9.0, 10.0, 7.0, 8.0, 9.0, 9.0, 10.0, 10.0, 10.0, 8.0, 10.0, 10.0, 8.0, 7.0, 9.0, 7.0, 7.0, 6.0], [10.0, 6.0, 21.0, 11.0, 6.0, 8.0, 6.0, 6.0, 6.0, 8.0, 8.0, 10.0, 11.0, 6.0, 7.0, 200.0, 8.0, 6.0, 6.0, 11.0], [12.0, 10.0, 18.0, 19.0, 7.0, 10.0, 9.0, 9.0, 11.0, 10.0, 17.0, 9.0, 19.0, 10.0, 9.0, 12.0, 11.0, 7.0, 7.0, 9.0], [9.0, 10.0, 9.0, 9.0, 7.0, 8.0, 10.0, 9.0, 10.0, 10.0, 10.0, 8.0, 8.0, 10.0, 8.0, 7.0, 9.0, 7.0, 7.0, 7.0], [10.0, 13.0, 8.0, 12.0, 6.0, 200.0, 7.0, 200.0, 6.0, 7.0, 9.0, 6.0, 8.0, 6.0, 6.0, 200.0, 8.0, 6.0, 7.0, 11.0], [12.0, 18.0, 18.0, 20.0, 7.0, 13.0, 16.0, 12.0, 11.0, 12.0, 16.0, 8.0, 10.0, 10.0, 8.0, 11.0, 12.0, 7.0, 7.0, 14.0])

In [52]:
@show mean(no_init_TP)
@show mean(yes_init_TP)
[no_init_TP yes_init_TP]

mean(no_init_TP) = 8.6
mean(yes_init_TP) = 8.6


20×2 Matrix{Float64}:
  9.0   9.0
  9.0  10.0
  9.0   9.0
 10.0   9.0
  7.0   7.0
  8.0   8.0
  9.0  10.0
  9.0   9.0
 10.0  10.0
 10.0  10.0
 10.0  10.0
  8.0   8.0
 10.0   8.0
 10.0  10.0
  8.0   8.0
  7.0   7.0
  9.0   9.0
  7.0   7.0
  7.0   7.0
  6.0   7.0

In [53]:
@show mean(no_init_iter)
@show mean(yes_init_iter)
[no_init_iter yes_init_iter]

mean(no_init_iter) = 18.05
mean(yes_init_iter) = 36.8


20×2 Matrix{Float64}:
  10.0   10.0
   6.0   13.0
  21.0    8.0
  11.0   12.0
   6.0    6.0
   8.0  200.0
   6.0    7.0
   6.0  200.0
   6.0    6.0
   8.0    7.0
   8.0    9.0
  10.0    6.0
  11.0    8.0
   6.0    6.0
   7.0    6.0
 200.0  200.0
   8.0    8.0
   6.0    6.0
   6.0    7.0
  11.0   11.0

In [54]:
@show mean(no_init_best_k)
@show mean(yes_init_best_k)
[no_init_best_k yes_init_best_k]

mean(no_init_best_k) = 11.25
mean(yes_init_best_k) = 12.1


20×2 Matrix{Float64}:
 12.0  12.0
 10.0  18.0
 18.0  18.0
 19.0  20.0
  7.0   7.0
 10.0  13.0
  9.0  16.0
  9.0  12.0
 11.0  11.0
 10.0  12.0
 17.0  16.0
  9.0   8.0
 19.0  10.0
 10.0  10.0
  9.0   8.0
 12.0  11.0
 11.0  12.0
  7.0   7.0
  7.0   7.0
  9.0  14.0

In [48]:
@show mean(no_init_TP)
@show mean(yes_init_TP)
[no_init_TP yes_init_TP]

mean(no_init_TP) = 8.4
mean(yes_init_TP) = 8.45


[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m
[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m


20×2 Matrix{Float64}:
  9.0   9.0
  9.0   9.0
  9.0   9.0
  8.0  10.0
  7.0   7.0
  6.0   8.0
  9.0  10.0
  9.0   9.0
 10.0  10.0
 10.0  10.0
 10.0  10.0
  8.0   8.0
  9.0   8.0
 10.0  10.0
  8.0   7.0
  8.0   7.0
  9.0   9.0
  7.0   7.0
  7.0   7.0
  6.0   5.0

In [49]:
@show mean(no_init_iter)
@show mean(yes_init_iter)
[no_init_iter yes_init_iter]

mean(no_init_iter) = 53.95
mean(yes_init_iter) = 44.05


20×2 Matrix{Float64}:
  19.0   13.0
  22.0   20.0
 167.0   74.0
  26.0   42.0
  14.0   10.0
  11.0    8.0
  42.0   32.0
 200.0  200.0
  14.0   12.0
  21.0   47.0
  25.0   13.0
  11.0    8.0
 200.0  145.0
  11.0    8.0
   7.0    5.0
 200.0  200.0
  49.0   16.0
  10.0    7.0
  10.0   10.0
  20.0   11.0

In [50]:
@show mean(no_init_best_k)
@show mean(yes_init_best_k)
[no_init_best_k yes_init_best_k]

mean(no_init_best_k) = 9.05
mean(yes_init_best_k) = 9.65


20×2 Matrix{Float64}:
  9.0   9.0
  9.0   9.0
 11.0  13.0
 12.0  16.0
  7.0   7.0
  6.0   8.0
  9.0  12.0
  9.0   9.0
 10.0  11.0
 10.0  13.0
 10.0  10.0
  8.0   8.0
  9.0  10.0
 10.0  10.0
  8.0   7.0
 12.0  11.0
 12.0  10.0
  7.0   7.0
  7.0   7.0
  6.0   6.0

## Can we increase IHT's true positive by finding nearby SNPs?

Short answer: not really (increase is 1-2%)

In [29]:
TP, TP_LD = process_iht(0.9, verbose=false)
@show mean(TP), mean(TP_LD);

k = 10, r = 2, polygenic model

(mean(TP), mean(TP_LD)) = (0.42875, 0.43875)


In [30]:
TP, TP_LD = process_iht(0.5)
@show mean(TP), mean(TP_LD);

k = 10, r = 2, polygenic model

(mean(TP), mean(TP_LD)) = (0.42875, 0.44125)


## Conclusion:
+ mvPLINK and GEMMA have essentially identical power, about 20% higher than IHT
+ High power of mvPLINK and GEMMA also gives **extremely high** false positives (type I error)
+ QQ and Manhattan plot for GEMMA and PLINK often look insanely bad
+ **Simulated genotypes *does not* produce insane p-values for GEMMA and mvPLINK.**
+ Searching SNPs in LD with the causal SNPs does not increase true positive rate for IHT