# Simulations using NFBC1966 results

Using chromosome 21 of the NFBC (Stampeed) data ($n=5340$ samples and $p = 5540$ SNPs), let us simulate:
+ Non-independent samples
+ Polygenic traits where every SNP contributes to the variance of the phenotypes, but only $k$ SNPs contribute to the mean of the phenotype.

## Simulation model

For $r$ traits, our simulation model is:

$$\mathbf{Y}_{r \times n} \sim \text{MatrixNormal}(\mathbf{B}_{r \times p}\mathbf{X}_{p \times n}, \ \ \Sigma_{r \times r} , \ \ \sigma_g^2\Phi_{n \times n} + \sigma_e^2 \mathbf{I}_{n \times n})$$

+ $\mathbf{X}_{p \times n}$ contains *all* predictors (genetic + non-genetic)
+ $\mathbf{B}_{r \times p}$ contains (true) regression coefficients. $k$ entries are non-zero
+ $\Phi$ is the GRM estimated from genotypes
+ $\Sigma_{r \times r} = \mathbf{A}^t\mathbf{A}$ is the row (trait) covariance matrix where $A_{ij} \sim N(0, 1)$
+ $\sigma_g^2\Phi_{n \times n} + \sigma_e^2 \mathbf{I}_{n \times n}$ is the column (sample) covariance matrix
+ $\sigma_g^2 = 0.6$ and $\sigma_e^2 = 0.4$ (thus heritability is 60%)
+ $k = \{10, 20, 50, 100\}$ (number of causal SNPs)
+ Pleiotripic overlap = 20% (e.g. if $k=10$, then 2 causal SNPs are shared in each trait, thus only 8 unique causal SNPs)
+ $\beta_i \sim N(0, 0.1)$
+ $r = \{2, 5\}$  (number of traits)
+ $n=5340$ samples
+ $p = 5540$ SNPs

In [1]:
# using Revise
# using MendelIHT
# using SnpArrays
# using Random
# using GLM
# using DelimitedFiles
# using Distributions
# using LinearAlgebra
# using CSV
# using DataFrames
# using StatsBase
# using Statistics
# using TraitSimulation

using Distributed
addprocs(8)
@everywhere begin
    using Revise
    using MendelIHT
    using SnpArrays
    using Random
    using GLM
    using DelimitedFiles
    using Distributions
    using LinearAlgebra
    using CSV
    using DataFrames
    using StatsBase
    using Statistics
    using TraitSimulation
end

┌ Info: Precompiling MendelIHT [921c7187-1484-5754-b919-5d3ed9ac03c4]
└ @ Base loading.jl:1317


### Helper functions for parsing results

In [2]:
"""
Find the `nsnps` most significant SNPs for mvPLINK in simulation `sim`.
"""
function get_top_mvPLINK_SNPs(sim::Int, nsnps::Int)
    n, p = 5402, 26906
    dir = "NFBCsim/sim$sim/"
    
    # read mvPLINK result
    mvplink_df = CSV.read(dir * "plink.mqfam.total", DataFrame, delim=' ', ignorerepeated=true)

    # get pvalues, possibly accounting for "NA"s
    if eltype(mvplink_df[!, :P]) == Float64
        pval = mvplink_df[!, :P]
    else
        mvplink_df[findall(x -> x == "NA", mvplink_df[!, :P]), :P] .= "1.0"
        pval = parse.(Float64, mvplink_df[!, :P])
    end
    perm = sortperm(pval)
    return perm[1:nsnps]
end

"""
Find the position of the `nsnps` most significant SNPs for GEMMA in simulation `sim`.
"""
function get_top_GEMMA_SNP_ids(sim::Int, nsnps::Int)
    dir = "NFBCsim/sim$sim/"
    gemma_df = CSV.read(dir * "gemma.sim$sim.assoc.txt", DataFrame)
    pval_wald = gemma_df[!, :p_wald]
    perm = sortperm(pval_wald)
    return perm[1:nsnps]
end

"""
Find significant SNPs return by IHT in simulation `sim`.
"""
function get_IHT_SNPs(sim::Int)
    dir = "NFBCsim/sim$sim/"
    iht_β1 = vec(readdlm(dir * "iht_beta1.txt"))
    iht_β2 = vec(readdlm(dir * "iht_beta2.txt"))
    detected_snps = findall(!iszero, iht_β1) ∪ findall(!iszero, iht_β2)
    return unique(detected_snps)
end

"""
Get positions for the truly causal SNPs in simulation `sim`. 
"""
function get_true_SNPs(sim::Int)
    model = "NFBCsim"
    dir = "NFBCsim/sim$sim/"
    trueB = readdlm(dir * "trueb.txt")
    causal_snps = unique([x[1] for x in findall(!iszero, trueB)])
    return causal_snps
end

"""
Get causal SNPs' position in GEMMA's result for simulation `sim`. Note gemma have snp filtering.
"""
function get_gemma_causal_snp_pos(sim::Int)
    model = "NFBCsim"
    nfbc = SnpData("/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/imputed_with_0/NFBC_imputed_with_0")
    dir = "NFBCsim/sim$sim/"
    trueB = readdlm(dir * "trueb.txt")
    causal_snps = unique([x[1] for x in findall(!iszero, trueB)])
    causal_snp_rsID = nfbc.snp_info.snpid[causal_snps]
    gemma_df = CSV.read("NFBCsim/sim$sim/gemma.sim$sim.assoc.txt", DataFrame)
    gemma_snps = gemma_df[!, :rs]
    causal_snp_idx = convert(Vector{Int}, indexin(causal_snp_rsID, gemma_snps))
    
    # also need IHT's selected SNPs
    iht_snps_rsID = nfbc.snp_info.snpid[get_IHT_SNPs(sim)]
    iht_snps_idx = convert(Vector{Int}, indexin(iht_snps_rsID, gemma_snps))
    
    # also need SNP positions in GEMMA dataframe
    gemma2nfbc_idx = convert(Vector{Int}, indexin(gemma_snps, nfbc.snp_info.snpid))
    gemma_snp_pos = Vector{Int}(undef, size(gemma_df, 1))
    for i in 1:size(gemma_df, 1)
        gemma_snp_pos[i] = nfbc.snp_info.position[gemma2nfbc_idx[i]]
    end
    insertcols!(gemma_df, size(gemma_df, 2) + 1, :pos => gemma_snp_pos)
    
    return gemma_df, causal_snp_idx, iht_snps_idx
end

"""
- filename: gemma's output file name
- correct_snps: indices for real causal SNPs

returns: power, number of false positives, and false positive rate
"""
function process_gemma_result(filename, correct_snps)
    # read GEMMA result
    gemma_df = CSV.read(filename, DataFrame)
    snps = size(gemma_df, 1)

    # pvalues
    pval_wald = gemma_df[!, :p_wald]
#    pval_lrt = gemma_df[!, :p_lrt]
#     pval_score = gemma_df[!, :p_score]

    # estimated beta
    estim_β1 = gemma_df[!, :beta_1]
    estim_β2 = gemma_df[!, :beta_2]

    # estimated covariance matrix
    estim_σ11 = gemma_df[!, :Vbeta_1_1]
    estim_σ12 = gemma_df[!, :Vbeta_1_2]
    estim_σ22 = gemma_df[!, :Vbeta_2_2];

    # check how many real SNPs were recovered
    signif_snps = findall(x -> x ≤ 0.05 / snps, pval_wald) # gemma's selected snps
    signif_snps_rsID = gemma_df[signif_snps, :rs]
    
    # compute power, false positives, and false positive rate
    power_and_fpr(snps, correct_snps, signif_snps_rsID)
end

"""
- filename: mvPLINK's output file name
- correct_snps: indices for real causal SNPs

returns: power, number of false positives, and false positive rate
"""
function process_mvPLINK(filename, correct_snps)
    # read mvPLINK result
    mvplink_df = CSV.read(filename, DataFrame, delim=' ', ignorerepeated=true)
    snps = size(mvplink_df, 1)

    # get pvalues, possibly accounting for "NA"s
    if eltype(mvplink_df[!, :P]) == Float64
        pval = mvplink_df[!, :P]
    else
        mvplink_df[findall(x -> x == "NA", mvplink_df[!, :P]), :P] .= "1.0"
        pval = parse.(Float64, mvplink_df[!, :P])
    end

    # SNPs passing threshold
    signif_snps = findall(x -> x ≤ 0.05 / snps, pval)

    # compute power, false positives, and false positive rate
    power_and_fpr(snps, correct_snps, signif_snps)
end

"""
Computes power and false positive rates
- p: total number of SNPs
- correct_snps: Indices (or rsID) of the true causal SNPs
- detected_snps: Indices (or rsID) of SNPs that are significant after testing
"""
function power_and_fpr(p::Int, correct_snps::Vector, detected_snps::Vector)
    power = length(detected_snps ∩ correct_snps) / length(correct_snps)
    FP = length(detected_snps) - length(detected_snps ∩ correct_snps) # number of false positives
    TN = p - length(detected_snps) # number of true negatives
    FPR = FP / (FP + TN)
    return power, FP, FPR
end

"""
Summarize all simulations for IHT, mvPLINK, GEMMA in computation time, true positives,
false positives, and false positive rates. 
"""
function summarize_repeats()
    model = "NFBCsim"
    n, p = 5402, 26906
    sims = 1:50 # k = 10, r = 2, βoverlap=2, polygenic model
    nfbc = SnpData("/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/imputed_with_0/NFBC_imputed_with_0")
    snp_rsID = nfbc.snp_info.snpid

    iht_time, iht_power, iht_FP, iht_FPR = Float64[], Float64[], Float64[], Float64[]
    mvPLINK_time, mvPLINK_power, mvPLINK_FP, mvPLINK_FPR = Float64[], Float64[], Float64[], Float64[]
    gemma_time, gemma_power, gemma_FP, gemma_FPR = Float64[], Float64[], Float64[], Float64[]

    for sim in sims
        dir = "NFBCsim/sim$sim/"
        try
            # correct SNPs
            trueB = readdlm(dir * "trueb.txt")
            causal_snps = unique([x[1] for x in findall(!iszero, trueB)])
            causal_snps_rsID = snp_rsID[causal_snps]

            # IHT
            iht_β1 = vec(readdlm(dir * "iht_beta1.txt"))
            iht_β2 = vec(readdlm(dir * "iht_beta2.txt"))
            detected_snps = findall(!iszero, iht_β1) ∪ findall(!iszero, iht_β2)
            ihtpower, ihtFP, ihtFPR = power_and_fpr(p, causal_snps, detected_snps)

            # MVPLINK
            plinkpower, plinkFP, plinkFPR = process_mvPLINK(dir * "plink.mqfam.total", causal_snps)

            # GEMMA 
            gemmapower, gemmaFP, gemmaFPR = process_gemma_result(dir * "gemma.sim$sim.assoc.txt", causal_snps_rsID)
            
            push!(iht_power, ihtpower); push!(iht_FP, ihtFP); push!(iht_FPR, ihtFPR); 
            push!(mvPLINK_power, plinkpower); push!(mvPLINK_FP, plinkFP); push!(mvPLINK_FPR, plinkFPR); 
            push!(gemma_power, gemmapower); push!(gemma_FP, gemmaFP); push!(gemma_FPR, gemmaFPR);
        catch
            println("simulation $sim failed!")
        end
    end

    return iht_time, iht_power, iht_FP, iht_FPR,
        mvPLINK_time, mvPLINK_power, mvPLINK_FP, mvPLINK_FPR,
        gemma_time, gemma_power, gemma_FP, gemma_FPR
end

"""
Imports gemma p-values, causal SNPs, and IHT selected SNP, and plot manhattan plot using MendelPlots.jl
"""
function plot_gemma_manhattan(sim::Int)
    # GEMMA causal SNPs
    gemma_df, causal_snps, iht_snps = get_gemma_causal_snp_pos(sim)
    rename!(gemma_df, [:p_wald => :pval, :rs => :snpid])
    gemma_df[findall(x -> x < 1e-50, gemma_df[!, :pval]), :pval] .= 1e-50
    empty_col = ["" for i in 1:size(gemma_df, 1)]
    insertcols!(gemma_df, size(gemma_df, 2) + 1, :empty_col => empty_col)

    manhattan(gemma_df, outfile = "NFBCsim/manhattan_gemma_sim$sim.png",
        annotateinds = causal_snps, annotateinds2 = iht_snps,
        annotatevar=:empty_col, titles="GEMMA simulation $sim")
    display("image/png", read("NFBCsim/manhattan_gemma_sim$sim.png"))
end

"""
Imports mvPLINK p-values, causal SNPs, and IHT selected SNP, and plot manhattan plot using MendelPlots.jl
"""
function plot_mvPLINK_manhattan(sim::Int)
    # mvPLINK
    filename = "NFBCsim/sim$sim/plink.mqfam.total"
    mvplink_df = CSV.read(filename, DataFrame, delim=' ', ignorerepeated=true)
    if eltype(mvplink_df[!, :P]) == Float64
        pval = mvplink_df[!, :P]
    else
        mvplink_df[findall(x -> x == "NA", mvplink_df[!, :P]), :P] .= "1.0"
        pval = parse.(Float64, mvplink_df[!, :P])
    end
    pval[findall(x -> x < 1e-50, pval)] .= 1e-50 # limit smallest pvalues

    # causal SNPs
    causal_snps = get_true_SNPs(sim)
    
    # IHT SNPs
    iht_snps = get_IHT_SNPs(sim)

    # make dataframe to input into MendelPlots
    snpdata = SnpData("/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/imputed_with_0/NFBC_imputed_with_0")
    rename!(snpdata.snp_info, [:chr, :snpid, :genetic_distance, :pos, :allele1, :allele2])
    insertcols!(snpdata.snp_info, size(snpdata.snp_info, 2) + 1, :pval => pval)
    empty_col = ["" for i in 1:size(snpdata.snp_info, 1)]
    insertcols!(snpdata.snp_info, size(snpdata.snp_info, 2) + 1, :empty_col => empty_col)

    # plot
    manhattan(snpdata.snp_info, outfile = "NFBCsim/manhattan_mvPLINK_sim$sim.png",
        annotateinds = causal_snps, annotateinds2 = iht_snps, 
        annotatevar=:empty_col, titles="mvPLINK simulation $sim")
    display("image/png", read("NFBCsim/manhattan_mvPLINK_sim$sim.png"))
end

"""
Imports mvPLINK p-values and plot QQ plot using MendelPlots.jl
"""
function plot_mvPLINK_QQ(sim::Int)
    filename = "NFBCsim/sim$sim/plink.mqfam.total"
    mvplink_df = CSV.read(filename, DataFrame, delim=' ', ignorerepeated=true)
    if eltype(mvplink_df[!, :P]) == Float64
        pval = mvplink_df[!, :P]
    else
        mvplink_df[findall(x -> x == "NA", mvplink_df[!, :P]), :P] .= "1.0"
        pval = parse.(Float64, mvplink_df[!, :P])
    end
    pval[findall(x -> x < 1e-50, pval)] .= 1e-50 # limit smallest pvalues
    qq(pval, outfile = "NFBCsim/QQ_mvPLINK_sim$sim.png",
        ylabel="mvPLINK observed -log10(p)", titles="mvPLINK simulation $sim")
    display("image/png", read("NFBCsim/QQ_mvPLINK_sim$sim.png"))
end

"""
Imports gemma p-values and plot QQ plot using MendelPlots.jl
"""
function plot_gemma_QQ(sim::Int)
    filename = "NFBCsim/sim$sim/gemma.sim$sim.assoc.txt"
    gemma_df = CSV.read(filename, DataFrame)
    pval_wald = gemma_df[!, :p_wald]
    pval_wald[findall(x -> x < 1e-50, pval_wald)] .= 1e-50 # limit smallest pvalues
    qq(pval_wald, outfile = "NFBCsim/QQ_gemma_sim$sim.png",
        ylabel="GEMMA observed -log10(p)", titles="GEMMA simulation $sim")
    display("image/png", read("NFBCsim/QQ_gemma_sim$sim.png"))
end

"""
After performing n simulations using `run_repeats`, this function reads the summary files
for each simulation and summarizes the result. 
"""
function read_summary(simulations::UnitRange)
    model = "NFBCsim"
    
    iht_time, iht_power, iht_FP, iht_FPT = Float64[], Float64[], Float64[], Float64[]
    mvPLINK_time, mvPLINK_power, mvPLINK_FP, mvPLINK_FPT = Float64[], Float64[], Float64[], Float64[]
    gemma_time, gemma_power, gemma_FP, gemma_FPT = Float64[], Float64[], Float64[], Float64[]

    regex = r"= (\d+\.\d+) seconds, power = (\d+\.\d+), FP = (\d+), FPR = (\d\.\d+e?-?\d*)"
    for sim in simulations
        if !isdir("$model/sim$sim") || !isfile("$model/sim$(sim)/summary.txt")
            println("Simulation $sim failed!")
            continue
        end
        try
            open("$model/sim$(sim)/summary.txt", "r") do io
                readline(io); readline(io); readline(io)

                # parse IHT result
                iht = match(regex, readline(io))
                push!(iht_time, parse(Float64, iht[1]))
                push!(iht_power, parse(Float64, iht[2]))
                push!(iht_FP, parse(Float64, iht[3]))
                push!(iht_FPT, parse(Float64, iht[4]))

                # parse mvPLINK result
                mvPLINK = match(regex, readline(io))
                push!(mvPLINK_time, parse(Float64, mvPLINK[1]))
                push!(mvPLINK_power, parse(Float64, mvPLINK[2]))
                push!(mvPLINK_FP, parse(Float64, mvPLINK[3]))
                push!(mvPLINK_FPT, parse(Float64, mvPLINK[4]))

                # parse mvPLINK result
                gemma = match(regex, readline(io))
                push!(gemma_time, parse(Float64, gemma[1]))
                push!(gemma_power, parse(Float64, gemma[2]))
                push!(gemma_FP, parse(Float64, gemma[3]))
                push!(gemma_FPT, parse(Float64, gemma[4]))
            end
        catch
            println("Simulation $sim failed!")
            continue
        end
    end

    # save summary statistics
    open("$(model)_summary.txt", "w") do io
        println(io, "iht_time,iht_power,iht_FP,iht_FPT,mvPLINK_time,mvPLINK_power," * 
            "mvPLINK_FP,mvPLINK_FPT,gemma_time,gemma_power,gemma_FP,gemma_FPT")
        for i in eachindex(iht_time)
            print(io, iht_time[i], ',', iht_power[i], ',', iht_FP[i], ',', iht_FPT[i], ',')
            print(io, mvPLINK_time[i], ',', mvPLINK_power[i], ',', mvPLINK_FP[i], ',', mvPLINK_FPT[i], ',')
            print(io, gemma_time[i], ',', gemma_power[i], ',', gemma_FP[i], ',', gemma_FPT[i], "\n")
        end
    end
    
    return iht_time, iht_power, iht_FP, iht_FPT, 
        mvPLINK_time, mvPLINK_power, mvPLINK_FP, mvPLINK_FPT,
        gemma_time, gemma_power, gemma_FP, gemma_FPT
end

"""
k = Number of causal SNPs
p = Total number of SNPs
traits = Number of traits (phenotypes)
overlap = number of causal SNPs shared in each trait
"""
function simulate_random_beta(k::Int, p::Int, traits::Int; overlap::Int=0, βσ=1.0)
    d = Normal(0, βσ)
    true_b = zeros(p, traits)
    if overlap == 0
        causal_snps = sample(1:(traits * p), k, replace=false)
        true_b[causal_snps] = rand(d, k)
    else
        shared_snps = sample(1:p, overlap, replace=false)
        weight_vector = aweights(1 / (traits * (p - overlap)) * ones(traits * p))
        for i in 1:traits
            weight_vector[i*shared_snps] .= 0.0 # avoid sampling from shared snps
        end
        @assert sum(weight_vector) ≈ 1.0
        # simulate β for shared predictors
        for i in 1:traits
            true_b[shared_snps, i] = rand(d, overlap)
        end
        # simulate β for none shared predictors
        nonshared_snps = sample(1:(traits * p), weight_vector, k - (traits * overlap), replace=false)
        true_b[nonshared_snps] = rand(d, k - (traits * overlap))
    end

    return true_b
end

"""
ad-hoc matrix-matrix multiplication (out = st * v) using SnpLinAlg
"""
function adhoc_mul!(
    out::AbstractMatrix{T}, 
    st::SnpLinAlg{T},
    v::AbstractMatrix{T}) where T <: AbstractFloat
    @assert size(out, 1) == size(st, 1) && size(v, 2) == size(v, 2) && size(st, 2) == size(v, 1)
    for i in 1:size(v, 2)
        outi = @view(out[:, i])
        vi = @view(v[:, i])
        SnpArrays.mul!(outi, st, vi)
    end
end

function simulate_NFBC1966_polygenic(
    plinkname::String, k::Int, r::Int;
    seed::Int=2021, σg=0.6, σe=0.4, βoverlap=2, βσ=1.0,
    )
    # set seed
    Random.seed!(seed)
    
    # simulate `.bed` file with no missing data
    x = SnpArray(plinkname * ".bed")
#     xla = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, impute=true, center=true, scale=true)
    xla = convert(Matrix{Float64}, x, impute=true, center=true, scale=true)
    n, p = size(x)
    
    # intercept is the only nongenetic covariate
    Z = ones(n, 1)
    intercepts = randn(r)' # each trait have different intercept

    # simulate β
    B = simulate_random_beta(k, p, r, overlap=βoverlap, βσ=βσ)

    # between trait covariance matrix
    Σ = random_covariance_matrix(r)

    # between sample covariance is identity + GRM
    Φ = grm(x, method=:Robust)
    V = σg * Φ + σe * I

    # simulate y using TraitSimulations.jl (https://github.com/OpenMendel/TraitSimulation.jl/blob/master/src/modelframework.jl#L137)
    vc = @vc Σ ⊗ V
    μ = zeros(n, r)
    μ_null = zeros(n, r)
    LinearAlgebra.mul!(μ_null, Z, intercepts)
#     adhoc_mul!(μ, xla, B)
    mul!(μ, xla, B)
    BLAS.axpby!(1.0, μ_null, 1.0, μ)
    VCM_model = VCMTrait(Z, intercepts, xla, B, vc, μ)
    Y = Matrix(Transpose(simulate(VCM_model)))
    
    return xla, Matrix(Z'), B, Σ, Y
end

function simulate_NFBC1966_sparse(
    plinkname::String, k::Int, r::Int;
    seed::Int=2021, βoverlap=2, βσ = 1.0
    )
    # set seed
    Random.seed!(seed)
    
    # simulate `.bed` file with no missing data
    x = SnpArray(plinkname * ".bed")
#     xla = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, center=true, scale=true)
    xla = convert(Matrix{Float64}, x, impute=true, center=true, scale=true)
    n, p = size(xla)
    
    # intercept is the only nongenetic covariate
    Z = ones(n, 1)
    intercepts = zeros(r)' # each trait have different intercept

    # simulate β
    B = simulate_random_beta(k, p, r, overlap=βoverlap, βσ=βσ)

    # between trait covariance matrix
    Σ = random_covariance_matrix(r)
    
    # simulate multivariate normal phenotype for each sample
    μ = xla * B + Z*intercepts

    # simulate response
    Y = zeros(n, r)
    for i in 1:n
        μi = @view(μ[i, :])
        Y[i, :] = rand(MvNormal(μi, Σ))
    end
    
    return xla, Matrix(Z'), B, Σ, Matrix(Y')
end

function simulate_multivariate_sparse(
    n::Int, p::Int, k::Int, r::Int;
    seed::Int=2021, βoverlap=2, 
    )
    # set seed
    Random.seed!(seed)
    
    # simulate `.bed` file with no missing data
    x = simulate_random_snparray(undef, n, p)
    xla = convert(Matrix{Float64}, x, impute=true, center=true, scale=true)
#     xla = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, center=true, scale=true)
    n, p = size(x)

    # intercept is the only nongenetic covariate
    z = ones(n, 1)
    intercepts = zeros(r)' # each trait have different intercept

    # simulate response y, true model b, and the correct non-0 positions of b
    Y, true_Σ, true_b, correct_position = simulate_random_response(xla, k, r, Zu=z*intercepts, overlap=βoverlap);
    
    return xla, Matrix(z'), true_b, true_Σ, Matrix(Y')
end

simulate_multivariate_sparse (generic function with 1 method)

# Try running IHT

In [3]:
# simulation parameters
k = 10
r = 2
βσ = 0.1
seed = 1111
βoverlap = 2

# use chr21 of NFBC
chr21 = "/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/qc/NFBC.qc.chr.21"
xla, Z, true_b, Σ, Y = simulate_NFBC1966_sparse(chr21,k,r,seed=seed,βoverlap=βoverlap,βσ=βσ)
causal_snps = unique([x[1] for x in findall(!iszero, true_b)]);

# simulate random data
# n = 1000
# p = 10000
# xla, Z, true_b, Σ, Y = simulate_multivariate_sparse(n, p, k,r,seed=seed,βoverlap=βoverlap)
# causal_snps = unique([x[1] for x in findall(!iszero, true_b)]);

In [4]:
println(Σ)
println(causal_snps)
true_b[causal_snps, :]

[0.056419962055242315 0.1802121298258207; 0.1802121298258207 2.723094788677102]
[2349, 3073, 3075, 4653, 5091, 5344, 5485, 2651]


8×2 Matrix{Float64}:
 -0.0523363   0.146389
 -0.105674    0.0
 -0.03193     0.0
  0.0654857   0.0
  0.0360225   0.0
 -0.0429316   0.0
  0.108861    0.0877691
  0.0        -0.141978

In [5]:
# no intercept term
function loglikelihood(X, Y, B, Γ)
    n = size(Y, 2)
    resid = Y - B*X
    return n/2 * logdet(Γ) - 0.5 * tr(Γ*resid*resid')
end

# this should be the best loglikehood attainable 
@show loglikelihood(Transpose(xla), Y, Transpose(true_b), inv(Σ))

# this should be loglikehood at iter 1 
@show loglikelihood(Transpose(xla), Y, zeros(size(Transpose(true_b))), [1.0 0.0; 0.0 1.0])

loglikelihood(Transpose(xla), Y, Transpose(true_b), inv(Σ)) = 373.8599630859753
loglikelihood(Transpose(xla), Y, zeros(size(Transpose(true_b))), [1.0 0.0; 0.0 1.0]) = -7480.311995286563


-7480.311995286563

In [6]:
result = fit_iht(Y, Transpose(xla), k=12, init_beta=true, verbose=true, debias=true)

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
Prior weight scaling = off
Doubly sparse projection = off
Debias = on
Max IHT iterations = 200
Converging when tol < 0.0001:

Iteration 1: loglikelihood = -348.5823880579728, backtracks = 0, tol = 0.0
Iteration 2: loglikelihood = -332.87462055781543, backtracks = 0, tol = 0.003647462926161623
Iteration 3: loglikelihood = -319.6504770864258, backtracks = 0, tol = 0.0033028925479674266
Iteration 4: loglikelihood = -308.2404972425411, backtracks = 0, tol = 0.003008504931047995



Compute time (sec):     0.5610418319702148
Final loglikelihood:    -80.25464250882578
Iterations:             7
Trait 1's SNP PVE:      0.25675536913268326
Trait 2's SNP PVE:      0.01544428569602941

Estimated trait covariance:
[1m2×2 DataFrame[0m
[1m Row [0m│[1m trait1   [0m[1m trait2   [0m
[1m     [0m│[90m Float64  [0m[90m Float64  [0m
─────┼────────────────────
   1 │ 0.064432  0.180975
   2 │ 0.180975  2.67285

Trait 1: IHT estimated 3 nonzero SNP predictors
[1m3×2 DataFrame[0m
[1m Row [0m│[1m Position [0m[1m Estimated_β [0m
[1m     [0m│[90m Int64    [0m[90m Float64     [0m
─────┼───────────────────────
   1 │     2349   -0.0501577
   2 │     3073   -0.0953751
   3 │     5485    0.106728

Trait 1: IHT estimated 0 non-genetic predictors
[1m0×2 DataFrame[0m

Trait 2: IHT estimated 9 nonzero SNP predictors
[1m9×2 DataFrame[0m
[1m Row [0m│[1m Position [0m[1m Estimated_β [0m
[1m     [0m│[90m Int64    [0m[90m Float64     [0m
─────┼───────────

In [7]:
result = fit_iht(Y, Transpose(xla), k=12, verbose=true, debias=true)

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 12
Prior weight scaling = off
Doubly sparse projection = off
Debias = on
Max IHT iterations = 200
Converging when tol < 0.0001:

Iteration 1: loglikelihood = -1086.7080446381206, backtracks = 0, tol = 0.0
Iteration 2: loglikelihood = -1083.8182233107045, backtracks = 0, tol = 7.068275112573435e-5
Iteration 3: loglikelihood = -176.068152614067, backtracks = 0, tol = 0.035389719786740916
Iteration 4: loglikelihood = -47.66422231391789, backtracks = 0, tol = 0.025845269446551987
I


Compute time (sec):     0.7363009452819824
Final loglikelihood:    355.59977418903236
Iterations:             9
Trait 1's SNP PVE:      0.39179817428213126
Trait 2's SNP PVE:      0.008613478375068375

Estimated trait covariance:
[1m2×2 DataFrame[0m
[1m Row [0m│[1m trait1    [0m[1m trait2   [0m
[1m     [0m│[90m Float64   [0m[90m Float64  [0m
─────┼─────────────────────
   1 │ 0.0562832  0.179476
   2 │ 0.179476   2.69174

Trait 1: IHT estimated 7 nonzero SNP predictors
[1m7×2 DataFrame[0m
[1m Row [0m│[1m Position [0m[1m Estimated_β [0m
[1m     [0m│[90m Int64    [0m[90m Float64     [0m
─────┼───────────────────────
   1 │     2349   -0.0421664
   2 │     3073   -0.103315
   3 │     3077    0.0335914
   4 │     4653    0.0661619
   5 │     5091    0.04317
   6 │     5344   -0.0359141
   7 │     5485    0.105314

Trait 1: IHT estimated 0 non-genetic predictors
[1m0×2 DataFrame[0m

Trait 2: IHT estimated 5 nonzero SNP predictors
[1m5×2 DataFrame[0m
[1m Row

In [8]:
# beta 1
[result.beta[1, causal_snps] true_b[causal_snps, 1]]

8×2 Matrix{Float64}:
 -0.0421664  -0.0523363
 -0.103315   -0.105674
  0.0        -0.03193
  0.0661619   0.0654857
  0.04317     0.0360225
 -0.0359141  -0.0429316
  0.105314    0.108861
  0.0         0.0

In [9]:
# beta 2
[result.beta[2, causal_snps] true_b[causal_snps, 2]]

8×2 Matrix{Float64}:
 0.0898681   0.146389
 0.0         0.0
 0.0         0.0
 0.0         0.0
 0.0         0.0
 0.0         0.0
 0.0265674   0.0877691
 0.0        -0.141978

In [10]:
# number of true positives
count(!iszero, sum(result.beta[:, causal_snps], dims=1))

6

In [11]:
# covariance
[vec(result.Σ) vec(Σ)]

4×2 Matrix{Float64}:
 0.0562832  0.05642
 0.179476   0.180212
 0.179476   0.180212
 2.69174    2.72309

In [12]:
causal_snps

8-element Vector{Int64}:
 2349
 3073
 3075
 4653
 5091
 5344
 5485
 2651

## Try 20 simulations (NFBC data, no debias)

We will compare

+ Number of true positives
+ Number of iteration until convergence
+ Best cross validated k

in scenario where we initialize beta values, versus not initializing. 

In [13]:
function bench(sim::Int)
    # simulation parameters
    k = 10
    r = 2
    βσ = 0.1
    βoverlap = 2
    chr21 = "/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/qc/NFBC.qc.chr.21"

    no_init_TP = zeros(sim)
    no_init_iter = zeros(sim)
    no_init_best_k = zeros(sim)
    yes_init_TP = zeros(sim)
    yes_init_iter = zeros(sim)
    yes_init_best_k = zeros(sim)

    # use chr21 of NFBC
    for i in 1:sim
        xla, Z, true_b, Σ, Y = simulate_NFBC1966_sparse(chr21,k,r,seed=i,βoverlap=βoverlap,βσ=βσ)
        causal_snps = unique([x[1] for x in findall(!iszero, true_b)]);

        # not initializing beta
        mses = cv_iht(Y, Transpose(xla), path=1:20, verbose=true)
        result = fit_iht(Y, Transpose(xla), k=argmin(mses), verbose=true)
        no_init_TP[i] = count(!iszero, sum(result.beta[:, causal_snps], dims=1))
        no_init_iter[i] = result.iter
        no_init_best_k[i] = argmin(mses)

        # initializing beta
        mses = cv_iht(Y, Transpose(xla), path=1:20, init_beta=true)
        result = fit_iht(Y, Transpose(xla), k=argmin(mses), verbose=true, init_beta = true)
        yes_init_TP[i] = count(!iszero, sum(result.beta[:, causal_snps], dims=1))
        yes_init_iter[i] = result.iter
        yes_init_best_k[i] = argmin(mses)
    end

    return no_init_TP, no_init_iter, no_init_best_k, 
        yes_init_TP, yes_init_iter, yes_init_best_k
end
@time no_init_TP, no_init_iter, no_init_best_k, 
    yes_init_TP, yes_init_iter, yes_init_best_k = bench(20)

[32mCross validating...100%|████████████████████████████████| Time: 0:04:28[39m




Crossvalidation Results:
	k	MSE
	1	7467.007578213517
	2	7467.016480222095
	3	7462.504268879455
	4	7462.414105189358
	5	7462.562881519958
	6	7462.729194037671
	7	7462.996119308439
	8	7463.0675206944925
	9	7463.05681877081
	10	7463.446247491652
	11	7463.47232982508
	12	7463.894828904651
	13	7463.66907552382
	14	7464.4044117643725
	15	7463.871210756556
	16	7464.021754154152
	17	7464.356385220899
	18	7465.448424125442
	19	7464.926936940883
	20	7465.868263113456

Best k = 4

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 4
Pri

[32mCross validating...100%|████████████████████████████████| Time: 0:05:27[39m




Crossvalidation Results:
	k	MSE
	1	7458.260827330637
	2	7463.298120100859
	3	7474.3830244782
	4	7484.012558113514
	5	7486.173844259777
	6	7505.856346671087
	7	7501.784218116077
	8	7503.162459082315
	9	7513.37364831712
	10	7520.165486197674
	11	7529.161697023144
	12	7535.050121376507
	13	7542.672857041491
	14	7556.781802997405
	15	7567.135434435953
	16	7576.9031707688455
	17	7576.311293515613
	18	7590.36611846913
	19	7594.052264475409
	20	7593.668400750834

Best k = 1

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 1
Prior

[32mCross validating...100%|████████████████████████████████| Time: 0:05:24[39m




Crossvalidation Results:
	k	MSE
	1	1913.9005791840955
	2	1905.4462744481466
	3	1863.2739527167591
	4	1863.4581973603604
	5	1863.5138945374124
	6	1863.4786807769447
	7	1863.4306845884544
	8	1863.5330829986765
	9	1863.7310478258287
	10	1863.758966373268
	11	1863.9978770170892
	12	1863.9254204177319
	13	1863.25820700546
	14	1863.4800615805486
	15	1863.4663678714178
	16	1863.4109901439967
	17	1863.8448675738432
	18	1863.8268729604583
	19	1863.815544089462
	20	1864.114810495742

Best k = 13

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:07:42[39m




Crossvalidation Results:
	k	MSE
	1	1866.7588600004276
	2	1862.7357551898574
	3	1862.2090026208543
	4	1861.5673432947003
	5	1858.5654607324034
	6	1848.0354248093338
	7	1841.8270221779096
	8	1831.7999355210693
	9	1817.699783932855
	10	1818.6929857486782
	11	1820.7475828654628
	12	1818.643501412179
	13	1820.0268776948615
	14	1817.6014017841712
	15	1819.7340908900137
	16	1822.8811507891799
	17	1822.081852348918
	18	1823.1173021731095
	19	1821.117200468349
	20	1822.027407776507

Best k = 14

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:03:57[39m




Crossvalidation Results:
	k	MSE
	1	1275.5949236809383
	2	1275.6234997545293
	3	1200.0893836256407
	4	1199.3342830869578
	5	1193.106072877331
	6	1184.4223035597875
	7	1173.0030501563883
	8	1173.008263156605
	9	1173.437904734517
	10	1173.662314764025
	11	1173.8943800627726
	12	1173.2642555406755
	13	1172.6793006769817
	14	1173.5025057316966
	15	1175.0537661763074
	16	1174.398546070553
	17	1174.9634382027102
	18	1174.764771560614
	19	1174.9860586302548
	20	1174.0110039597312

Best k = 13

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity param

[32mCross validating...100%|████████████████████████████████| Time: 0:05:15[39m




Crossvalidation Results:
	k	MSE
	1	1235.5856558597393
	2	1234.8946512659018
	3	1200.5690737618988
	4	1200.291050889921
	5	1199.8432019050358
	6	1189.6128415552935
	7	1192.552614452596
	8	1181.4691056434144
	9	1181.8651133526657
	10	1174.1973325909153
	11	1173.3361213962814
	12	1174.3108635203332
	13	1173.7773020010613
	14	1174.7516766957879
	15	1175.019849498845
	16	1176.6081886721995
	17	1175.3757232317607
	18	1175.8741162588901
	19	1176.8736447019642
	20	1176.8406177640366

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity pa

[32mCross validating...100%|████████████████████████████████| Time: 0:02:00[39m




Crossvalidation Results:
	k	MSE
	1	4091.8797786863224
	2	4093.0011285132546
	3	4019.709499282237
	4	4026.209770087683
	5	4027.0461596943583
	6	4031.697798596103
	7	4031.437177857407
	8	4036.6099355816696
	9	4040.576443642172
	10	4039.994648807503
	11	4045.049656721885
	12	4047.3801386656132
	13	4043.703495620936
	14	4048.1393429485615
	15	4044.283347166554
	16	4050.060249993162
	17	4050.212569436352
	18	4046.4779805622
	19	4055.8922549066233
	20	4060.9278351493926

Best k = 3

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) 

[32mCross validating...100%|████████████████████████████████| Time: 0:03:16[39m




Crossvalidation Results:
	k	MSE
	1	4063.6980146258047
	2	4051.657269970886
	3	4029.1146693253127
	4	4032.5282866262596
	5	4035.7669815069116
	6	4038.519991905023
	7	4044.55250404415
	8	4045.1963060139947
	9	4050.3567677412057
	10	4045.448588171457
	11	4047.0045735648864
	12	4047.224546150397
	13	4056.619957431523
	14	4057.8523133420376
	15	4068.7472929614073
	16	4069.642760410755
	17	4070.9759731365057
	18	4066.1819629440597
	19	4085.544662085469
	20	4084.5842395983655

Best k = 3

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

[32mCross validating...100%|████████████████████████████████| Time: 0:02:26[39m




Crossvalidation Results:
	k	MSE
	1	2577.4119117248447
	2	2577.801174894436
	3	2486.938290292238
	4	2460.1963433441992
	5	2443.7795492846785
	6	2453.6240518018376
	7	2444.7504396910863
	8	2447.1474852086003
	9	2447.4545106107835
	10	2445.325915416952
	11	2446.6065261650087
	12	2447.7716495091895
	13	2446.569185014726
	14	2450.793474835577
	15	2447.8031902519147
	16	2447.3513071809566
	17	2448.0415649984875
	18	2450.9641613071426
	19	2453.934824244281
	20	2452.441238352465

Best k = 5

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:02:48[39m




Crossvalidation Results:
	k	MSE
	1	2525.7531443742305
	2	2500.109401319319
	3	2473.62510207123
	4	2463.4792586332906
	5	2445.350674053392
	6	2444.5174517612195
	7	2445.6655508936647
	8	2446.063620425999
	9	2446.6634392227093
	10	2447.886075108174
	11	2449.0641683853983
	12	2448.541077217696
	13	2448.042604174436
	14	2449.4634474731147
	15	2453.648637561276
	16	2451.8906254212707
	17	2453.8658189942107
	18	2456.1288515971464
	19	2455.893900641504
	20	2459.7782420067097

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter 

[32mCross validating...100%|████████████████████████████████| Time: 0:04:15[39m




Crossvalidation Results:
	k	MSE
	1	1972.035109245357
	2	1972.0612516485928
	3	1914.8683229206067
	4	1907.7905172480014
	5	1903.6048614024266
	6	1883.8518215923516
	7	1885.7015858560576
	8	1883.6167371532656
	9	1887.472327400245
	10	1884.1066110283637
	11	1885.9923893064952
	12	1890.444385087242
	13	1894.3462304716245
	14	1896.3661274774047
	15	1901.8127307482878
	16	1902.075067598375
	17	1903.0225267979913
	18	1898.9778280619055
	19	1905.2727120564346
	20	1907.4585959692486

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:04:09[39m




Crossvalidation Results:
	k	MSE
	1	1915.815076872546
	2	1915.9495109866073
	3	1899.832156074063
	4	1899.09157591323
	5	1885.7465119167941
	6	1883.0324524493017
	7	1883.0298761657239
	8	1883.5137356463017
	9	1885.3899935222857
	10	1890.2353721258519
	11	1892.9838215902305
	12	1894.3912378477885
	13	1894.8030871675605
	14	1894.9226936773596
	15	1898.43842446982
	16	1902.4001097807318
	17	1897.2344662090131
	18	1898.8342023997743
	19	1900.882345725138
	20	1906.0222519148979

Best k = 7

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:03:21[39m




Crossvalidation Results:
	k	MSE
	1	288.854369521326
	2	288.8629007872701
	3	224.86065102426846
	4	223.4744591049007
	5	223.04808249187099
	6	223.24520820565604
	7	223.04595027233628
	8	222.47666605149428
	9	222.53006533528966
	10	220.77456040499615
	11	220.8123875956328
	12	220.84857197013963
	13	218.9030984805645
	14	217.83856436674546
	15	217.88334707666658
	16	217.8537716398566
	17	218.669789272961
	18	217.91530118807992
	19	218.00732123970494
	20	218.03217565606656

Best k = 14

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:02:41[39m




Crossvalidation Results:
	k	MSE
	1	230.06499598836677
	2	226.6172244150579
	3	224.89981254768128
	4	223.52526435005606
	5	217.89888742588596
	6	217.46790407212765
	7	217.78779844178263
	8	217.48279252896398
	9	217.50139509107365
	10	217.21040315464535
	11	216.82576406323156
	12	216.8990872974978
	13	216.1276318328716
	14	214.32556706241627
	15	214.3513965049052
	16	214.39359265225832
	17	214.600751471613
	18	215.1274707112658
	19	216.3600099240253
	20	216.39581504763746

Best k = 14

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:01:15[39m




Crossvalidation Results:
	k	MSE
	1	4813.621613165374
	2	4814.2183939489305
	3	4780.5828657746315
	4	4771.3952911906845
	5	4766.739580009461
	6	4764.092223248155
	7	4768.624323403346
	8	4768.739347129635
	9	4770.305229320452
	10	4774.563311220815
	11	4775.11612975745
	12	4778.783967621034
	13	4780.366879549602
	14	4776.706730704579
	15	4781.074889380486
	16	4769.663328459639
	17	4774.8098187869455
	18	4774.74791761279
	19	4771.856301193318
	20	4771.517648989404

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 6


[32mCross validating...100%|████████████████████████████████| Time: 0:02:59[39m




Crossvalidation Results:
	k	MSE
	1	4800.374708761479
	2	4781.847687385784
	3	4757.5677472898915
	4	4748.102393441143
	5	4745.122878570222
	6	4748.559299736944
	7	4735.049003738351
	8	4736.6183101967135
	9	4743.475193978542
	10	4736.692626860243
	11	4731.354820368104
	12	4724.352606787902
	13	4723.028066251027
	14	4722.128479661051
	15	4720.799973897028
	16	4723.423154207945
	17	4727.9253885847575
	18	4734.065548793302
	19	4729.778673733047
	20	4745.820552579169

Best k = 15

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 

[32mCross validating...100%|████████████████████████████████| Time: 0:03:29[39m




Crossvalidation Results:
	k	MSE
	1	3503.042385940169
	2	3503.232769359981
	3	3434.5665966851348
	4	3438.3432006108037
	5	3437.7950749791735
	6	3432.2572820329738
	7	3415.8116100172288
	8	3412.4162771876413
	9	3416.8625864605806
	10	3414.905234513062
	11	3412.3218977134125
	12	3412.8983302925494
	13	3412.89687184643
	14	3415.0873337967014
	15	3410.4608037003236
	16	3413.181952155901
	17	3411.7639980308213
	18	3411.282750405425
	19	3411.090263533574
	20	3411.127699705403

Best k = 15

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:04:19[39m




Crossvalidation Results:
	k	MSE
	1	3444.4371432071093
	2	3435.788569999415
	3	3432.412012824152
	4	3437.056098805003
	5	3428.4639468315545
	6	3425.0465122851897
	7	3418.9340018814883
	8	3418.545364573005
	9	3411.9310337582365
	10	3416.512545374907
	11	3419.362401252404
	12	3424.901176278615
	13	3430.4023217030326
	14	3426.8531665542578
	15	3424.078215750423
	16	3423.795684604597
	17	3421.288507769044
	18	3424.812482890294
	19	3425.0571298391496
	20	3434.155751193579

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k

[32mCross validating...100%|████████████████████████████████| Time: 0:03:31[39m




Crossvalidation Results:
	k	MSE
	1	5491.178134277059
	2	5492.756483258866
	3	5475.274378050346
	4	5476.285410646168
	5	5468.339179142937
	6	5475.213842773202
	7	5485.588414666186
	8	5489.955250413212
	9	5498.646702080656
	10	5500.836311450659
	11	5492.268785439828
	12	5508.21762349822
	13	5509.238853654272
	14	5528.506689231456
	15	5519.956473548504
	16	5538.725978932884
	17	5529.847151545998
	18	5536.901154717026
	19	5530.329240167266
	20	5532.214479112587

Best k = 5

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 5
Pri

[32mCross validating...100%|████████████████████████████████| Time: 0:04:09[39m




Crossvalidation Results:
	k	MSE
	1	5479.3076340526995
	2	5466.130185830577
	3	5473.941204890789
	4	5478.763920039964
	5	5478.846669989436
	6	5475.967693321434
	7	5480.424278655223
	8	5473.5751677376475
	9	5484.853973023236
	10	5485.122121782384
	11	5484.27954442408
	12	5484.600730546089
	13	5489.049080835641
	14	5508.458014023352
	15	5515.560209755948
	16	5511.3173242837875
	17	5521.583359637425
	18	5526.95291862195
	19	5539.344957729575
	20	5540.180584535619

Best k = 2

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 2
P

[32mCross validating...100%|████████████████████████████████| Time: 0:02:08[39m




Crossvalidation Results:
	k	MSE
	1	5881.990518093519
	2	5882.906977820634
	3	5809.526491630293
	4	5804.182442047137
	5	5806.965553234308
	6	5810.724218521751
	7	5812.454970971565
	8	5815.404684479746
	9	5818.31774770261
	10	5822.344627561354
	11	5824.495882867141
	12	5823.121802296413
	13	5821.269168090734
	14	5824.298840037664
	15	5827.156001883334
	16	5829.500621506112
	17	5833.667689513635
	18	5834.4768157503895
	19	5833.455949567264
	20	5830.383275028527

Best k = 4

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 4
Pr

[32mCross validating...100%|████████████████████████████████| Time: 0:01:49[39m




Crossvalidation Results:
	k	MSE
	1	5839.365650374756
	2	5820.6002724090995
	3	5799.769572318459
	4	5793.884485265079
	5	5794.856641592719
	6	5797.5593150416935
	7	5802.42314018823
	8	5807.9417593510025
	9	5810.583051869469
	10	5808.106001711536
	11	5809.401722340603
	12	5811.09431964851
	13	5809.989434126955
	14	5811.248390128811
	15	5814.403726574873
	16	5815.433229680395
	17	5815.0367723356085
	18	5810.462734690295
	19	5825.813281434457
	20	5827.263805599105

Best k = 4

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 4


[32mCross validating...100%|████████████████████████████████| Time: 0:03:04[39m




Crossvalidation Results:
	k	MSE
	1	1475.7281456269352
	2	1475.6454049495942
	3	1411.8956039211846
	4	1406.6438321349037
	5	1408.5912148774412
	6	1408.2058262552425
	7	1407.9040175456237
	8	1407.4501661384566
	9	1407.4931764098092
	10	1407.519993754785
	11	1407.5064949328903
	12	1407.648257837834
	13	1407.5700408440355
	14	1407.1903324723978
	15	1407.1572221180359
	16	1408.7686471760917
	17	1407.21267466901
	18	1409.5922836730617
	19	1407.6583516007754
	20	1409.4763633096693

Best k = 4

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:03:49[39m




Crossvalidation Results:
	k	MSE
	1	1451.9290528710596
	2	1428.2935859717848
	3	1411.9662771631629
	4	1406.763717226786
	5	1408.7359088252222
	6	1408.2611256850125
	7	1407.7748480354737
	8	1407.6309487012968
	9	1407.5783287625818
	10	1407.582748732895
	11	1407.5095387743727
	12	1407.5997791886587
	13	1406.8203174206412
	14	1412.9799871089172
	15	1419.981569794961
	16	1418.8469613302893
	17	1412.4810310745806
	18	1415.8549472353175
	19	1418.3585196187826
	20	1418.5780795452147

Best k = 4

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity par

[32mCross validating...100%|████████████████████████████████| Time: 0:01:46[39m




Crossvalidation Results:
	k	MSE
	1	10870.089679752416
	2	10871.109774176442
	3	10844.446872493492
	4	10782.741143036672
	5	10759.995471490767
	6	10844.548781584002
	7	10810.60939671244
	8	10822.185703991076
	9	10784.662709629436
	10	10750.246383626833
	11	10754.683500795545
	12	10750.595868340464
	13	10754.586572918342
	14	10753.191513171125
	15	10755.837757996904
	16	10752.892264922039
	17	10758.749560389351
	18	10763.734362467847
	19	10759.689044177025
	20	10757.649336984565

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity 

[32mCross validating...100%|████████████████████████████████| Time: 0:02:17[39m




Crossvalidation Results:
	k	MSE
	1	10792.822698992453
	2	10751.953550903918
	3	10752.130241745304
	4	10755.628954379396
	5	10751.255901795988
	6	10752.289724313745
	7	10752.666070226363
	8	10755.854829914533
	9	10758.265491148926
	10	10768.33284743366
	11	10793.392997498839
	12	10804.121900428261
	13	10800.728251859226
	14	10819.089307841883
	15	10825.38126104345
	16	10832.939422442021
	17	10823.156644653203
	18	10832.07539066182
	19	10830.494714100427
	20	10840.165631464273

Best k = 5

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity par

[32mCross validating...100%|████████████████████████████████| Time: 0:01:44[39m




Crossvalidation Results:
	k	MSE
	1	1502.3613448266462
	2	1502.1979004976347
	3	1465.1973224916705
	4	1459.5947760164117
	5	1459.5775951633282
	6	1457.9466091780432
	7	1457.3568243337422
	8	1455.951865423367
	9	1457.3390479992192
	10	1458.5176824652808
	11	1458.1442269550234
	12	1458.955475959885
	13	1459.8190523215812
	14	1460.1185354055838
	15	1462.290544485875
	16	1462.3404253255212
	17	1463.6584924262472
	18	1464.7493788552142
	19	1463.7485638131254
	20	1464.246709570992

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:02:33[39m




Crossvalidation Results:
	k	MSE
	1	1487.0231350880258
	2	1481.592411786974
	3	1465.4123446445817
	4	1460.031277804782
	5	1459.5804852669983
	6	1460.0994270398633
	7	1460.031669274536
	8	1460.5061106485064
	9	1462.7224879250348
	10	1463.8131689619875
	11	1464.3634728279014
	12	1464.843522719765
	13	1465.609478929725
	14	1465.4200008286134
	15	1465.2623474608642
	16	1469.5744725323
	17	1468.6378731255081
	18	1467.1661326713424
	19	1470.7968473109286
	20	1467.0045266330944

Best k = 5

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:03:11[39m




Crossvalidation Results:
	k	MSE
	1	3092.621449665644
	2	3092.908787756128
	3	3015.2833690467205
	4	3011.6165625116882
	5	3011.6481459968722
	6	2998.1170178506227
	7	2999.2742385363463
	8	3001.630691039438
	9	3002.9780652786403
	10	3003.5580955339206
	11	3004.1176036427437
	12	3004.0441760346703
	13	2987.473674680865
	14	2984.659215725924
	15	2979.5605261525316
	16	2973.8946127823783
	17	2980.50823805058
	18	2967.5378183010794
	19	2972.3279077981424
	20	2965.947552627348

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:03:35[39m




Crossvalidation Results:
	k	MSE
	1	3053.2318393485166
	2	3008.1145360371574
	3	2989.7598143637606
	4	2969.3905163736813
	5	2949.4692236535116
	6	2935.986279136612
	7	2937.028152661659
	8	2940.2728802033826
	9	2940.459058768066
	10	2940.684032811662
	11	2940.8711238875785
	12	2942.157667611186
	13	2941.6057049691676
	14	2943.00156494509
	15	2941.918291602824
	16	2942.4645944033236
	17	2942.929755730073
	18	2943.7673076508845
	19	2943.8022632502925
	20	2944.8770768149807

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

[32mCross validating...100%|████████████████████████████████| Time: 0:02:10[39m




Crossvalidation Results:
	k	MSE
	1	3102.067239900307
	2	3102.9871937194594
	3	2995.9273341468993
	4	2989.329466566704
	5	2982.9827873371664
	6	2987.214335773894
	7	2988.6161338994793
	8	2988.5012936722496
	9	2989.8081083114794
	10	2991.2432265771527
	11	2994.9174011571313
	12	2995.8727361937954
	13	2995.6850776378888
	14	2997.525595963017
	15	2992.707063658029
	16	2998.8170646897506
	17	2998.685903338198
	18	2998.5287798464246
	19	2999.5137851619957
	20	3000.064794988546

Best k = 5

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:02:14[39m




Crossvalidation Results:
	k	MSE
	1	3045.485842933249
	2	3013.918683438138
	3	2997.144986889636
	4	2989.914414986964
	5	2983.7198835832987
	6	2987.670754965542
	7	2990.1286718895253
	8	2991.205694278921
	9	2990.793444839355
	10	2994.035325411006
	11	2993.658832421836
	12	2993.4399897282124
	13	2993.842710863093
	14	2998.0660968543366
	15	2999.2264831574585
	16	3001.06477091092
	17	3004.107060964309
	18	3000.78601176381
	19	3006.3415331366
	20	3002.603182756062

Best k = 5

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 5
P

[32mCross validating...100%|████████████████████████████████| Time: 0:01:06[39m




Crossvalidation Results:
	k	MSE
	1	1701.1995908149156
	2	1701.6373323500757
	3	1682.6277126615353
	4	1683.3431521671882
	5	1684.9507585918823
	6	1686.7599772392377
	7	1689.8677398500572
	8	1692.742535868501
	9	1694.6206423176966
	10	1693.7697981556803
	11	1695.2475219116734
	12	1697.351048980299
	13	1698.78628664105
	14	1698.2712298966692
	15	1698.5851086028113
	16	1704.1328680233848
	17	1703.071045838963
	18	1703.4055531054207
	19	1709.96153390425
	20	1713.5788886272612

Best k = 3

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:01:17[39m




Crossvalidation Results:
	k	MSE
	1	1684.439984406172
	2	1683.6375341090716
	3	1687.5629857799447
	4	1689.0343434020426
	5	1691.0540123222272
	6	1692.9589488608099
	7	1694.007487255755
	8	1694.622171867147
	9	1699.390302860267
	10	1701.0398827563358
	11	1703.845436704682
	12	1707.0629500632447
	13	1708.8702612019808
	14	1711.4449252447243
	15	1710.8685659254413
	16	1711.630836776988
	17	1715.5839583151783
	18	1718.839198672084
	19	1718.884588329776
	20	1718.963671389857

Best k = 2

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

[32mCross validating...100%|████████████████████████████████| Time: 0:01:44[39m




Crossvalidation Results:
	k	MSE
	1	6757.212985516462
	2	6758.1591237714865
	3	6653.594821082028
	4	6645.581259669915
	5	6646.279295121156
	6	6646.645856404873
	7	6653.962293784511
	8	6656.171373718611
	9	6656.025254521587
	10	6659.315693183225
	11	6658.969794277049
	12	6659.409447307451
	13	6657.717186259715
	14	6658.330455811921
	15	6661.188937013742
	16	6662.819348946281
	17	6659.7107864446225
	18	6663.05696493803
	19	6664.594550538564
	20	6665.7222062576375

Best k = 4

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 4


[32mCross validating...100%|████████████████████████████████| Time: 0:02:21[39m




Crossvalidation Results:
	k	MSE
	1	6699.465771486873
	2	6667.93984170735
	3	6660.263875188048
	4	6652.091045480462
	5	6657.6489592117005
	6	6661.910238027387
	7	6661.537372850804
	8	6664.821847430254
	9	6664.2990984068965
	10	6663.877248952397
	11	6663.85306391596
	12	6662.655919257319
	13	6660.3480535468325
	14	6667.804117075299
	15	6664.3301794100635
	16	6666.691850215452
	17	6667.881340651591
	18	6674.136697729246
	19	6678.260296098839
	20	6671.89118273477

Best k = 4

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 4
P

[32mCross validating...100%|████████████████████████████████| Time: 0:00:33[39m




Crossvalidation Results:
	k	MSE
	1	2544.378020666334
	2	2544.6255505899
	3	2511.745251491427
	4	2511.7071469386683
	5	2512.663699966195
	6	2513.636548699165
	7	2513.9725340975974
	8	2513.3281727802505
	9	2516.640101288806
	10	2518.856338391225
	11	2520.6883781462875
	12	2518.6473993503973
	13	2527.224766989109
	14	2528.4612900600473
	15	2531.6093011707844
	16	2531.4485267347945
	17	2535.353093020852
	18	2535.1173655471025
	19	2544.159041691056
	20	2548.515644608565

Best k = 4

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k)

[32mCross validating...100%|████████████████████████████████| Time: 0:00:59[39m




Crossvalidation Results:
	k	MSE
	1	2536.01021109946
	2	2524.716877494743
	3	2510.020501210819
	4	2512.62766588871
	5	2518.9926247234575
	6	2523.2138866370033
	7	2521.327581421692
	8	2524.319379760087
	9	2528.758992819723
	10	2531.6713168673077
	11	2535.2008144537626
	12	2538.7800050603755
	13	2545.0317602020123
	14	2539.2168286857595
	15	2541.278265446793
	16	2543.6527460226366
	17	2541.4796691997667
	18	2545.060816286742
	19	2546.2779229902453
	20	2549.436860931663

Best k = 3

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k

[32mCross validating...100%|████████████████████████████████| Time: 0:01:54[39m




Crossvalidation Results:
	k	MSE
	1	2686.6738771389537
	2	2687.204500771152
	3	2601.4461530447093
	4	2586.1952191880564
	5	2572.908756982094
	6	2558.1455456151716
	7	2537.2202006946604
	8	2543.343077831648
	9	2539.8595711934495
	10	2545.692966546877
	11	2539.7962982620543
	12	2543.6049785581486
	13	2542.8371883951163
	14	2544.320314405359
	15	2542.4478626452405
	16	2546.178667846294
	17	2546.363660841593
	18	2547.8129669128934
	19	2545.5213173203456
	20	2555.882300533487

Best k = 7

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:02:01[39m




Crossvalidation Results:
	k	MSE
	1	2668.9841390603888
	2	2620.0537339033417
	3	2587.264582090639
	4	2578.456026836089
	5	2569.007540305888
	6	2559.160507263263
	7	2545.0488520940157
	8	2550.137120472825
	9	2547.4397347986114
	10	2544.9880040224252
	11	2545.0214508474023
	12	2549.0853567032086
	13	2550.754531299288
	14	2546.09954150562
	15	2546.7708643945925
	16	2549.871696812181
	17	2551.6090044367115
	18	2554.7844451517853
	19	2550.076662582116
	20	2554.3757441711987

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

([3.0, 6.0, 5.0, 3.0, 5.0, 4.0, 6.0, 5.0, 7.0, 1.0, 4.0, 4.0, 4.0, 6.0, 8.0, 5.0, 2.0, 4.0, 3.0, 5.0], [200.0, 200.0, 63.0, 9.0, 17.0, 200.0, 200.0, 44.0, 200.0, 142.0, 9.0, 12.0, 37.0, 25.0, 200.0, 18.0, 15.0, 14.0, 8.0, 35.0], [4.0, 13.0, 13.0, 3.0, 5.0, 8.0, 14.0, 6.0, 15.0, 5.0, 4.0, 4.0, 10.0, 8.0, 20.0, 5.0, 3.0, 4.0, 4.0, 7.0], [0.0, 6.0, 5.0, 3.0, 6.0, 4.0, 7.0, 7.0, 7.0, 1.0, 4.0, 4.0, 4.0, 4.0, 6.0, 4.0, 1.0, 4.0, 3.0, 6.0], [121.0, 158.0, 94.0, 12.0, 32.0, 200.0, 76.0, 127.0, 102.0, 67.0, 7.0, 10.0, 23.0, 16.0, 24.0, 16.0, 18.0, 17.0, 5.0, 41.0], [1.0, 14.0, 11.0, 3.0, 6.0, 7.0, 14.0, 15.0, 9.0, 2.0, 4.0, 4.0, 5.0, 5.0, 6.0, 5.0, 2.0, 4.0, 3.0, 10.0])

In [14]:
@show mean(no_init_TP)
@show mean(yes_init_TP)
[no_init_TP yes_init_TP]

mean(no_init_TP) = 4.5
mean(yes_init_TP) = 4.3


20×2 Matrix{Float64}:
 3.0  0.0
 6.0  6.0
 5.0  5.0
 3.0  3.0
 5.0  6.0
 4.0  4.0
 6.0  7.0
 5.0  7.0
 7.0  7.0
 1.0  1.0
 4.0  4.0
 4.0  4.0
 4.0  4.0
 6.0  4.0
 8.0  6.0
 5.0  4.0
 2.0  1.0
 4.0  4.0
 3.0  3.0
 5.0  6.0

In [15]:
@show mean(no_init_iter)
@show mean(yes_init_iter)
[no_init_iter yes_init_iter]

mean(no_init_iter) = 82.4
mean(yes_init_iter) = 58.3


20×2 Matrix{Float64}:
 200.0  121.0
 200.0  158.0
  63.0   94.0
   9.0   12.0
  17.0   32.0
 200.0  200.0
 200.0   76.0
  44.0  127.0
 200.0  102.0
 142.0   67.0
   9.0    7.0
  12.0   10.0
  37.0   23.0
  25.0   16.0
 200.0   24.0
  18.0   16.0
  15.0   18.0
  14.0   17.0
   8.0    5.0
  35.0   41.0

In [16]:
@show mean(no_init_best_k)
@show mean(yes_init_best_k)
[no_init_best_k yes_init_best_k]

mean(no_init_best_k) = 7.75
mean(yes_init_best_k) = 6.5


20×2 Matrix{Float64}:
  4.0   1.0
 13.0  14.0
 13.0  11.0
  3.0   3.0
  5.0   6.0
  8.0   7.0
 14.0  14.0
  6.0  15.0
 15.0   9.0
  5.0   2.0
  4.0   4.0
  4.0   4.0
 10.0   5.0
  8.0   5.0
 20.0   6.0
  5.0   5.0
  3.0   2.0
  4.0   4.0
  4.0   3.0
  7.0  10.0

## Try 20 simulations (NFBC data, debias)

We will compare

+ Number of true positives
+ Number of iteration until convergence
+ Best cross validated k

in scenario where we initialize beta values, versus not initializing. 

In [17]:
function bench(sim::Int)
    # simulation parameters
    k = 10
    r = 2
    βσ = 0.1
    βoverlap = 2
    chr21 = "/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/qc/NFBC.qc.chr.21"

    no_init_TP = zeros(sim)
    no_init_iter = zeros(sim)
    no_init_best_k = zeros(sim)
    yes_init_TP = zeros(sim)
    yes_init_iter = zeros(sim)
    yes_init_best_k = zeros(sim)

    # use chr21 of NFBC
    for i in 1:sim
        xla, Z, true_b, Σ, Y = simulate_NFBC1966_sparse(chr21,k,r,seed=i,βoverlap=βoverlap,βσ=βσ)
        causal_snps = unique([x[1] for x in findall(!iszero, true_b)]);

        # not initializing beta
        mses = cv_iht(Y, Transpose(xla), path=1:20, verbose=true, debias=true)
        result = fit_iht(Y, Transpose(xla), k=argmin(mses), verbose=true, debias=true)
        no_init_TP[i] = count(!iszero, sum(result.beta[:, causal_snps], dims=1))
        no_init_iter[i] = result.iter
        no_init_best_k[i] = argmin(mses)

        # initializing beta
        mses = cv_iht(Y, Transpose(xla), path=1:20, init_beta=true, debias=true)
        result = fit_iht(Y, Transpose(xla), k=argmin(mses), verbose=true, init_beta = true, debias=true)
        yes_init_TP[i] = count(!iszero, sum(result.beta[:, causal_snps], dims=1))
        yes_init_iter[i] = result.iter
        yes_init_best_k[i] = argmin(mses)
    end

    return no_init_TP, no_init_iter, no_init_best_k, 
        yes_init_TP, yes_init_iter, yes_init_best_k
end
@time no_init_TP, no_init_iter, no_init_best_k, 
    yes_init_TP, yes_init_iter, yes_init_best_k = bench(20)

[32mCross validating...100%|████████████████████████████████| Time: 0:00:19[39m




Crossvalidation Results:
	k	MSE
	1	7467.007578213517
	2	7467.016480222095
	3	7462.686520010227
	4	7459.731399434859
	5	7459.571145178593
	6	7457.612883138536
	7	7460.606294499556
	8	7456.681759241473
	9	7453.695689317448
	10	7453.483260738049
	11	7450.948727887585
	12	7449.373132917843
	13	7454.041410951184
	14	7453.081947522708
	15	7446.562437949809
	16	7448.961342829954
	17	7444.609016101175
	18	7439.239665478103
	19	7438.558642497654
	20	7446.166923049819

Best k = 19

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 19


[32mCross validating...100%|████████████████████████████████| Time: 0:00:26[39m




Crossvalidation Results:
	k	MSE
	1	7458.687808733434
	2	7461.704152142922
	3	7464.707553434446
	4	7476.675745851983
	5	7476.437905561855
	6	7483.87242299432
	7	7463.085155403782
	8	7465.324305579339
	9	7463.984127476909
	10	7470.118980054739
	11	7477.338987273931
	12	7482.055611463823
	13	7483.012940730736
	14	7485.348313617027
	15	7499.932601590906
	16	7507.263813269407
	17	7503.940842453529
	18	7502.954556004397
	19	7499.774021815997
	20	7494.422615125665

Best k = 1

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 1
Pri

[32mCross validating...100%|████████████████████████████████| Time: 0:00:24[39m




Crossvalidation Results:
	k	MSE
	1	1913.9005791840955
	2	1899.0564915268014
	3	1846.317589182921
	4	1846.6065278914505
	5	1847.4943060566102
	6	1846.1821799169454
	7	1844.7627523738313
	8	1840.4436227308645
	9	1842.925766685599
	10	1845.627320137686
	11	1828.1042132568687
	12	1828.8843521125282
	13	1832.8493058241022
	14	1831.1989423852751
	15	1823.6059320359977
	16	1823.5814339464057
	17	1820.6069709568185
	18	1821.7559619069339
	19	1825.3833059090043
	20	1835.493108459912

Best k = 17

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity par

[32mCross validating...100%|████████████████████████████████| Time: 0:00:36[39m




Crossvalidation Results:
	k	MSE
	1	1866.689431696879
	2	1848.9869173618063
	3	1844.9489620682164
	4	1845.4745629057004
	5	1844.4825546420757
	6	1840.5591171451601
	7	1838.6067129606224
	8	1829.4205275262627
	9	1814.7337655125648
	10	1814.6944388007428
	11	1816.4285673458141
	12	1810.696581169976
	13	1811.242339330835
	14	1809.8086030612476
	15	1808.6869569638955
	16	1809.826184348662
	17	1807.8745245440314
	18	1801.1043620892665
	19	1803.8300713696804
	20	1805.8181404887519

Best k = 18

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity par

[32mCross validating...100%|████████████████████████████████| Time: 0:00:44[39m




Crossvalidation Results:
	k	MSE
	1	1275.5949236809383
	2	1275.6234997545293
	3	1199.299013919542
	4	1196.8039572053353
	5	1184.3458808997761
	6	1172.1669775477105
	7	1171.528360748574
	8	1171.9837246121115
	9	1172.276339059532
	10	1170.66961936908
	11	1169.6856873268657
	12	1169.0975461967114
	13	1167.5084849994378
	14	1165.473655474763
	15	1166.0664517509285
	16	1164.0364115072282
	17	1164.6341608814798
	18	1164.5178128056637
	19	1162.839055340995
	20	1162.4261558335995

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parame

[32mCross validating...100%|████████████████████████████████| Time: 0:00:41[39m




Crossvalidation Results:
	k	MSE
	1	1235.2020863240266
	2	1234.5266940250224
	3	1199.7944697582243
	4	1199.2680062583133
	5	1197.986417178987
	6	1196.6064284946633
	7	1190.9213991728566
	8	1184.959963001992
	9	1180.3441267139474
	10	1173.0246360890962
	11	1171.4707606592062
	12	1170.6103714357498
	13	1169.295587305433
	14	1169.1225262471924
	15	1169.5313552108435
	16	1168.3105836146158
	17	1168.8126119984709
	18	1165.7206600412987
	19	1165.2256453054467
	20	1166.0326256001895

Best k = 19

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity pa

[32mCross validating...100%|████████████████████████████████| Time: 0:00:43[39m




Crossvalidation Results:
	k	MSE
	1	4091.8797786863224
	2	4093.0011285132546
	3	4021.3234736800614
	4	4026.2657500625764
	5	4012.221369237183
	6	4012.633935102751
	7	4017.405771368376
	8	4014.7630793596704
	9	4015.7866591672278
	10	4021.7437964177193
	11	4008.765189301789
	12	4016.293025600475
	13	4015.53380319444
	14	4012.1919704646557
	15	4016.186691276759
	16	4016.8140099154366
	17	4019.877000181546
	18	4019.987540670878
	19	4022.195039729325
	20	4012.803535528199

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (

[32mCross validating...100%|████████████████████████████████| Time: 0:00:41[39m




Crossvalidation Results:
	k	MSE
	1	4062.2955625837703
	2	4061.9166813211905
	3	4055.433607799812
	4	4042.4825218316773
	5	4024.8821262946526
	6	4029.449951098421
	7	4024.2336574210517
	8	4018.8425716090032
	9	4018.9201797399473
	10	4017.7617577103774
	11	4022.6726617218906
	12	4021.5061935323583
	13	4025.3413035405547
	14	4023.6007064165997
	15	4021.985169450196
	16	4015.619520236413
	17	4022.9466787789534
	18	4021.5396420594134
	19	4023.995355065161
	20	4014.276978809956

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity param

[32mCross validating...100%|████████████████████████████████| Time: 0:00:46[39m




Crossvalidation Results:
	k	MSE
	1	2577.4119117248447
	2	2577.801174894436
	3	2486.3719492100468
	4	2459.31834129966
	5	2447.6301133710176
	6	2442.343093137241
	7	2455.222646101837
	8	2441.6171220171286
	9	2441.7292130797196
	10	2439.2083103548034
	11	2435.745279943736
	12	2437.5781089252387
	13	2437.206050579081
	14	2438.625261799019
	15	2436.3795826271976
	16	2451.077442434488
	17	2434.755888412146
	18	2435.8372798612986
	19	2433.6410591724903
	20	2433.7852034140915

Best k = 19

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

[32mCross validating...100%|████████████████████████████████| Time: 0:00:53[39m




Crossvalidation Results:
	k	MSE
	1	2525.4455722065163
	2	2499.4674483577724
	3	2472.6080554316895
	4	2461.9045420751086
	5	2443.462309579876
	6	2441.9381788757605
	7	2441.315000589897
	8	2442.7676360434043
	9	2440.9161084505276
	10	2443.3887526348126
	11	2443.7203161276057
	12	2438.101748955266
	13	2440.06516370444
	14	2435.718292042962
	15	2445.8433886357548
	16	2440.7636214143276
	17	2439.59126508324
	18	2436.5068546512584
	19	2440.4343360301236
	20	2438.4889271646985

Best k = 14

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:00:35[39m




Crossvalidation Results:
	k	MSE
	1	1972.035109245357
	2	1972.0612516485928
	3	1990.6656725805772
	4	1943.1532093837768
	5	1931.827452567566
	6	1914.936517963412
	7	1894.5485506846444
	8	1886.9307596862395
	9	1887.0239534066388
	10	1882.0507811211094
	11	1875.1011187459533
	12	1874.9580486896323
	13	1877.1044204636173
	14	1879.4925734445355
	15	1877.8380854169773
	16	1876.2714771227388
	17	1877.716632616645
	18	1878.3976558574732
	19	1875.9881213951373
	20	1879.5078111111236

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity par

[32mCross validating...100%|████████████████████████████████| Time: 0:00:53[39m




Crossvalidation Results:
	k	MSE
	1	1915.3207130559415
	2	1894.951878429015
	3	1920.9277581647855
	4	1912.9624817734143
	5	1900.791594361164
	6	1902.1656604867367
	7	1894.6007263796573
	8	1891.069628536275
	9	1882.7057351929627
	10	1882.7240330735829
	11	1881.7368297070348
	12	1877.8316295594136
	13	1882.051225861459
	14	1876.9008285629466
	15	1874.603782800964
	16	1880.8183694058127
	17	1878.297834920028
	18	1879.449309615311
	19	1879.5309539416962
	20	1879.5504110313645

Best k = 15

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parame

[32mCross validating...100%|████████████████████████████████| Time: 0:00:29[39m




Crossvalidation Results:
	k	MSE
	1	288.854369521326
	2	288.8629007872701
	3	224.82851593513448
	4	223.43486160937618
	5	222.00411888358897
	6	223.6809889128935
	7	222.93183775014157
	8	226.18012367493975
	9	224.41251235963855
	10	223.70706489443424
	11	222.3876297180244
	12	223.19425844975214
	13	221.20562312352774
	14	221.31333002628452
	15	220.9990021983584
	16	221.0216366332936
	17	221.1756832901582
	18	218.75481802910065
	19	219.28047319405903
	20	220.89669638688173

Best k = 18

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:00:49[39m




Crossvalidation Results:
	k	MSE
	1	230.0585836069764
	2	226.60655003705727
	3	224.88032277133084
	4	223.49129679453404
	5	217.80310172672907
	6	216.41918343741088
	7	217.42042216958
	8	219.61152274880106
	9	215.7417299722675
	10	218.21748096110866
	11	217.1633638206817
	12	216.33757263630022
	13	215.2627258164435
	14	212.6389433153301
	15	211.93503610390738
	16	212.17969902387807
	17	212.87502438646936
	18	213.67431605729934
	19	213.6781584367354
	20	213.83365015828113

Best k = 15

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:00:53[39m




Crossvalidation Results:
	k	MSE
	1	4813.621613165374
	2	4814.2183939489305
	3	4777.887715819571
	4	4774.9413673895415
	5	4761.715333915685
	6	4749.671783868682
	7	4748.463327110014
	8	4750.695833945833
	9	4764.75184611455
	10	4759.532975172376
	11	4743.585198304637
	12	4755.279756501416
	13	4744.239406406021
	14	4739.450756952489
	15	4739.872029625504
	16	4731.056379744949
	17	4736.476608591404
	18	4732.012515331467
	19	4743.16223488412
	20	4737.882298364391

Best k = 16

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 16


[32mCross validating...100%|████████████████████████████████| Time: 0:00:41[39m




Crossvalidation Results:
	k	MSE
	1	4761.8645343342305
	2	4748.991932930428
	3	4735.4101177314615
	4	4722.312755964662
	5	4715.631283121632
	6	4712.754279833172
	7	4713.535292974976
	8	4716.359154085807
	9	4709.203874277003
	10	4701.452808608334
	11	4697.890776829343
	12	4697.508377439306
	13	4695.39925864275
	14	4695.7712133738905
	15	4688.7239255895865
	16	4692.457018747401
	17	4687.061245779006
	18	4700.96321985858
	19	4695.061794808725
	20	4703.961282564888

Best k = 17

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 1

[32mCross validating...100%|████████████████████████████████| Time: 0:00:49[39m




Crossvalidation Results:
	k	MSE
	1	3503.042385940169
	2	3503.232769359981
	3	3421.970879901101
	4	3413.9452264475344
	5	3408.7796033587265
	6	3406.2062150543625
	7	3405.7236265351785
	8	3402.153225667037
	9	3402.6598851058343
	10	3400.4108827764144
	11	3401.838482209606
	12	3400.8887842174067
	13	3403.219500002084
	14	3401.069113950834
	15	3398.105503010368
	16	3398.4755284314065
	17	3398.6055683852146
	18	3399.5474842523085
	19	3399.952095252343
	20	3400.0476148891903

Best k = 15

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:00:50[39m




Crossvalidation Results:
	k	MSE
	1	3444.39308369781
	2	3430.2026323862538
	3	3418.5914537575154
	4	3412.690409841845
	5	3407.5771731436535
	6	3412.1380855227235
	7	3419.61815868683
	8	3411.1508727534933
	9	3403.1644535203022
	10	3411.898422381367
	11	3410.8708177047847
	12	3408.5746862360566
	13	3413.1266357160716
	14	3409.4503031735608
	15	3413.0924622856223
	16	3406.8173940601987
	17	3412.741200379616
	18	3412.192460396402
	19	3409.3701002379503
	20	3407.7591794749464

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:01:01[39m




Crossvalidation Results:
	k	MSE
	1	5491.178134277059
	2	5492.756483258866
	3	5535.541990657599
	4	5473.673545768851
	5	5500.342015068236
	6	5515.29052786479
	7	5501.511499443041
	8	5458.535189208756
	9	5507.212485287991
	10	5452.604184985264
	11	5497.017316241997
	12	5482.3269308708495
	13	5464.035462853408
	14	5477.966197882575
	15	5473.782627995324
	16	5457.075342269033
	17	5466.30644221249
	18	5458.913202965118
	19	5463.7966795036455
	20	5473.321628536017

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 10


[32mCross validating...100%|████████████████████████████████| Time: 0:01:21[39m




Crossvalidation Results:
	k	MSE
	1	5466.750027064632
	2	5473.440916921742
	3	5504.81769710533
	4	5461.17275635276
	5	5537.049783412611
	6	5524.653663167265
	7	5513.5715737796245
	8	5453.9975089694
	9	5444.6834376678025
	10	5449.34657147468
	11	5440.069047435631
	12	5445.778330516663
	13	5441.956599017582
	14	5450.929894780831
	15	5450.784206062881
	16	5446.371826637396
	17	5456.673206565471
	18	5452.182998795554
	19	5464.128274451153
	20	5453.09020144184

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 11
Prio

[32mCross validating...100%|████████████████████████████████| Time: 0:01:35[39m




Crossvalidation Results:
	k	MSE
	1	5881.990518093519
	2	5882.906977820634
	3	5806.737294169866
	4	5799.851857650317
	5	5795.33392440504
	6	5791.578594295851
	7	5789.067844778459
	8	5789.918055172568
	9	5785.48693088401
	10	5790.8183049371555
	11	5793.79490826193
	12	5789.085364508638
	13	5785.94861047908
	14	5786.295670650696
	15	5785.988107469164
	16	5786.360940511853
	17	5783.669456948962
	18	5777.331844016431
	19	5783.550835746234
	20	5777.285548060578

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 20
Pri

[32mCross validating...100%|████████████████████████████████| Time: 0:01:35[39m




Crossvalidation Results:
	k	MSE
	1	5838.623812753846
	2	5818.822629722374
	3	5797.753456722861
	4	5790.904566108169
	5	5789.424083780242
	6	5794.606669424213
	7	5801.707417853591
	8	5800.43193874506
	9	5802.481976031851
	10	5796.033454977562
	11	5801.0108048594475
	12	5795.8361128631195
	13	5794.888952572877
	14	5796.107787201243
	15	5798.21411184493
	16	5802.0955372796325
	17	5787.453949332319
	18	5807.398526535162
	19	5794.517235126036
	20	5788.244674287151

Best k = 17

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 17

[32mCross validating...100%|████████████████████████████████| Time: 0:00:28[39m




Crossvalidation Results:
	k	MSE
	1	1475.7281456269352
	2	1475.6454049495942
	3	1411.440573996389
	4	1413.3649677547369
	5	1400.0957804871898
	6	1406.8398502225837
	7	1407.6881783096628
	8	1405.8284154631808
	9	1402.5220372073088
	10	1398.8995572546248
	11	1399.023698936331
	12	1401.4086591579912
	13	1400.436932964728
	14	1395.4427119756197
	15	1398.3479876240833
	16	1397.216262092619
	17	1395.9020354705629
	18	1396.122409269288
	19	1395.8754232149863
	20	1393.4030801241274

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:00:49[39m




Crossvalidation Results:
	k	MSE
	1	1451.883531219492
	2	1428.2500147768924
	3	1411.5353064742349
	4	1410.3030675626242
	5	1413.5496876151237
	6	1400.6009262048312
	7	1402.5344391808862
	8	1403.1484809927338
	9	1400.2279081779584
	10	1403.3007765772988
	11	1396.4196484440392
	12	1406.6417004985856
	13	1400.9267648069585
	14	1399.335226863077
	15	1397.9968382664335
	16	1400.1942500993716
	17	1401.947175795502
	18	1396.5337449637675
	19	1397.5330119817274
	20	1399.2592307083255

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity pa

[32mCross validating...100%|████████████████████████████████| Time: 0:01:11[39m




Crossvalidation Results:
	k	MSE
	1	10870.089679752416
	2	10871.109774176442
	3	11196.975242690742
	4	10784.243015918146
	5	10742.830720076878
	6	10734.138515909148
	7	10729.583235899467
	8	10723.572460137284
	9	10722.461116254526
	10	10718.831153057148
	11	10718.191013942173
	12	10727.327208090708
	13	10716.032579236657
	14	10711.412561385925
	15	10715.293415663715
	16	10703.289175404103
	17	10710.632678281208
	18	10696.58409756068
	19	10713.534098589847
	20	10701.43369938422

Best k = 18

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity p

[32mCross validating...100%|████████████████████████████████| Time: 0:01:15[39m




Crossvalidation Results:
	k	MSE
	1	10792.669761470363
	2	10750.688400878955
	3	10820.96498332776
	4	10794.695864239698
	5	10747.074780411593
	6	10743.655771715064
	7	10734.008033421353
	8	10740.153872394209
	9	10736.537093141447
	10	10728.866936100561
	11	10758.29292147907
	12	10771.39779819769
	13	10770.704357774992
	14	10787.140820279212
	15	10787.695752389465
	16	10767.96024928296
	17	10786.711138877708
	18	10791.834205479328
	19	10793.9949026807
	20	10800.27861972473

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parame

[32mCross validating...100%|████████████████████████████████| Time: 0:00:22[39m




Crossvalidation Results:
	k	MSE
	1	1502.3613448266462
	2	1502.1979004976347
	3	1467.0485415000792
	4	1468.9567768583843
	5	1458.684683472207
	6	1456.5919131771661
	7	1456.7175868809736
	8	1455.1829700271674
	9	1454.5641288576226
	10	1455.220644244605
	11	1455.6771480655598
	12	1454.9511693897011
	13	1456.1245760007896
	14	1455.3528688601095
	15	1454.8625991282788
	16	1456.4559935361704
	17	1453.0264618479223
	18	1453.6433145643684
	19	1452.230959086658
	20	1452.3132265508839

Best k = 19

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity pa

[32mCross validating...100%|████████████████████████████████| Time: 0:00:46[39m




Crossvalidation Results:
	k	MSE
	1	1486.000062880585
	2	1483.878796020385
	3	1463.0600641881033
	4	1459.3239263798525
	5	1458.233782310579
	6	1455.6597983355164
	7	1454.3396782736618
	8	1454.9129755718245
	9	1454.4367614960192
	10	1453.3507924072856
	11	1457.8065220121518
	12	1454.7197268449509
	13	1454.1780561677258
	14	1452.9443866142367
	15	1453.8801352926055
	16	1452.1107776466565
	17	1451.7965815781436
	18	1453.5803941432523
	19	1450.9999059339063
	20	1452.8813172423297

Best k = 19

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity pa

[32mCross validating...100%|████████████████████████████████| Time: 0:00:26[39m




Crossvalidation Results:
	k	MSE
	1	3092.621449665644
	2	3092.908787756128
	3	3073.9334657607947
	4	3045.8477663088893
	5	2999.321224789318
	6	2991.517779821783
	7	2966.5771874276047
	8	2943.289240994586
	9	2934.85024340751
	10	2938.609406368021
	11	2936.0084841435128
	12	2933.923202664445
	13	2933.205197393424
	14	2932.2165879705344
	15	2927.3442566370654
	16	2917.366796430788
	17	2921.206261307427
	18	2924.2435345168374
	19	2922.2124025134717
	20	2916.3992522471517

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (

[32mCross validating...100%|████████████████████████████████| Time: 0:00:36[39m




Crossvalidation Results:
	k	MSE
	1	3053.3366656204653
	2	3007.7831594872505
	3	2982.959182858318
	4	3007.425541955365
	5	2970.6181242762136
	6	2978.5066935613677
	7	2966.0078621065186
	8	2953.642077281048
	9	2941.1346999500474
	10	2947.6594629428723
	11	2962.403350817082
	12	2961.059235550455
	13	2938.591107803745
	14	2951.5064680423707
	15	2949.029864199401
	16	2954.9521162276933
	17	2962.8492182148325
	18	2947.3764445524675
	19	2931.448790124125
	20	2921.6135861162093

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:00:54[39m




Crossvalidation Results:
	k	MSE
	1	3102.067239900307
	2	3102.9871937194594
	3	2994.934638509072
	4	2988.032057512977
	5	2981.393126471363
	6	2976.7277824136168
	7	2980.4746974079635
	8	2976.7982748108666
	9	2979.1409114975886
	10	2978.1243160408344
	11	2975.9409261699348
	12	2973.58777517036
	13	2978.6127264215916
	14	2973.4772019951247
	15	2974.1051337981608
	16	2968.961558409321
	17	2979.189836011008
	18	2973.7440434501787
	19	2975.0816298977475
	20	2976.4197511833804

Best k = 16

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:00:52[39m




Crossvalidation Results:
	k	MSE
	1	3044.8736248644477
	2	3013.2522914162337
	3	2996.309306683244
	4	2988.9279539482695
	5	2982.5005791005165
	6	2979.023684431305
	7	2989.0410328637945
	8	2990.0863466727724
	9	2992.034685047129
	10	2983.548492746679
	11	2979.679488166955
	12	2979.290180607393
	13	2988.005407710315
	14	2983.4375176779818
	15	2988.624504141574
	16	2985.5473069072737
	17	2980.0937477103844
	18	2976.9131975795067
	19	2976.3650880914897
	20	2971.2150804998137

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:00:38[39m




Crossvalidation Results:
	k	MSE
	1	1701.1995908149156
	2	1701.6373323500757
	3	1679.2820706620046
	4	1678.5086163427964
	5	1681.553812603046
	6	1678.4871147174915
	7	1679.277267450045
	8	1679.7389284257715
	9	1681.4138608438927
	10	1683.9298841838468
	11	1684.6533658873866
	12	1683.3591910897655
	13	1687.8188654979103
	14	1687.2025450787437
	15	1685.2871963852813
	16	1684.9695031854067
	17	1684.673444986792
	18	1684.9364655551533
	19	1686.417131911789
	20	1687.5300662410014

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:01:07[39m




Crossvalidation Results:
	k	MSE
	1	1683.9864579960008
	2	1681.2428123825403
	3	1681.4765044136348
	4	1688.8145359868915
	5	1684.6520997702887
	6	1684.0886005484535
	7	1681.7060927888858
	8	1683.5893844436275
	9	1687.8122696133555
	10	1686.9167188832125
	11	1687.029369711996
	12	1691.898825090972
	13	1689.1490592387104
	14	1688.3876734973587
	15	1694.786149592427
	16	1691.8956719925836
	17	1691.7598291169948
	18	1695.5339619029908
	19	1694.9053163932206
	20	1694.286348417299

Best k = 2

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:00:54[39m




Crossvalidation Results:
	k	MSE
	1	6757.212985516462
	2	6758.1591237714865
	3	6652.71131209331
	4	6633.960201200404
	5	6649.802239354184
	6	6617.249969626252
	7	6632.319356460358
	8	6612.73556143469
	9	6618.961496799311
	10	6624.702659536715
	11	6620.329069516494
	12	6628.74793374219
	13	6619.9351614287725
	14	6614.674326497967
	15	6619.077027197595
	16	6618.8207567395575
	17	6610.822830875203
	18	6614.01017539162
	19	6613.8177417960505
	20	6604.2761814848955

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 20

[32mCross validating...100%|████████████████████████████████| Time: 0:00:50[39m




Crossvalidation Results:
	k	MSE
	1	6699.310324382229
	2	6667.259822405361
	3	6655.437973417724
	4	6640.518522036846
	5	6647.369049566362
	6	6648.578584047985
	7	6653.744154922959
	8	6650.848789348936
	9	6641.071923702473
	10	6639.175353729595
	11	6633.100307721341
	12	6635.156204204261
	13	6632.331397482015
	14	6630.038948798145
	15	6630.881848853969
	16	6630.075082247194
	17	6634.233966560745
	18	6632.4793318062075
	19	6630.744734951502
	20	6636.723746924215

Best k = 14

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 14

[32mCross validating...100%|████████████████████████████████| Time: 0:00:59[39m




Crossvalidation Results:
	k	MSE
	1	2544.378020666334
	2	2544.6255505899
	3	2510.0543907195124
	4	2504.8796650029526
	5	2504.9894091950073
	6	2506.6953878411077
	7	2505.484296573498
	8	2504.2703151451815
	9	2507.0088762803002
	10	2506.108670090388
	11	2505.7722983943054
	12	2507.1401725059345
	13	2505.430861688485
	14	2505.2962336317833
	15	2508.0204529715215
	16	2510.207568153102
	17	2508.489650795081
	18	2507.286275453221
	19	2512.0472884367614
	20	2516.5544981451285

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter 

[32mCross validating...100%|████████████████████████████████| Time: 0:01:30[39m




Crossvalidation Results:
	k	MSE
	1	2535.9068071281354
	2	2522.8170450355296
	3	2506.805974211429
	4	2508.19095496054
	5	2507.857451506925
	6	2508.578020643629
	7	2508.6444548634263
	8	2510.9369114797955
	9	2511.7160196676614
	10	2513.386043559812
	11	2510.443832559956
	12	2514.5181216952906
	13	2517.2668881158183
	14	2510.273163021654
	15	2511.090367953819
	16	2511.5642873324327
	17	2511.9394741386996
	18	2517.6135175236227
	19	2515.0561261082958
	20	2515.4211827860736

Best k = 3

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

[32mCross validating...100%|████████████████████████████████| Time: 0:00:40[39m




Crossvalidation Results:
	k	MSE
	1	2686.6738771389537
	2	2687.204500771152
	3	2599.7419730253623
	4	2585.257303093636
	5	2583.9553314944706
	6	2536.6187805458276
	7	2538.732292512429
	8	2532.2152682466485
	9	2533.2859811473627
	10	2529.441151030224
	11	2531.9270473915813
	12	2531.962556279564
	13	2528.3144870973556
	14	2532.5943010057294
	15	2531.2001200887753
	16	2534.2120445149667
	17	2532.7510173579412
	18	2529.9254760478634
	19	2527.3355893059525
	20	2530.643568366261

Best k = 19

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity param

[32mCross validating...100%|████████████████████████████████| Time: 0:00:38[39m




Crossvalidation Results:
	k	MSE
	1	2640.015342861501
	2	2597.444581892266
	3	2567.799028107913
	4	2555.4190585670262
	5	2545.6170246686174
	6	2538.40334414925
	7	2533.4124775160617
	8	2533.1980627196676
	9	2532.6507057682197
	10	2529.6061423939414
	11	2532.0562754920093
	12	2533.011201771853
	13	2533.704580638236
	14	2531.407525970939
	15	2531.681180638196
	16	2530.0335173547546
	17	2532.6924857140066
	18	2532.282491110858
	19	2534.2084000976615
	20	2528.9678298650306

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

([3.0, 3.0, 6.0, 3.0, 5.0, 4.0, 5.0, 5.0, 5.0, 2.0, 5.0, 6.0, 3.0, 6.0, 8.0, 5.0, 3.0, 4.0, 3.0, 6.0], [13.0, 11.0, 8.0, 7.0, 10.0, 9.0, 13.0, 15.0, 11.0, 8.0, 12.0, 25.0, 12.0, 13.0, 10.0, 16.0, 9.0, 11.0, 8.0, 12.0], [19.0, 17.0, 20.0, 11.0, 19.0, 12.0, 18.0, 16.0, 15.0, 10.0, 20.0, 20.0, 18.0, 19.0, 20.0, 16.0, 6.0, 20.0, 8.0, 19.0], [0.0, 5.0, 5.0, 3.0, 5.0, 4.0, 7.0, 4.0, 4.0, 2.0, 4.0, 5.0, 4.0, 6.0, 7.0, 5.0, 1.0, 5.0, 3.0, 5.0], [6.0, 8.0, 9.0, 9.0, 7.0, 7.0, 11.0, 7.0, 9.0, 10.0, 200.0, 14.0, 16.0, 9.0, 11.0, 9.0, 7.0, 11.0, 6.0, 10.0], [1.0, 18.0, 19.0, 20.0, 14.0, 15.0, 15.0, 17.0, 9.0, 11.0, 17.0, 11.0, 10.0, 19.0, 20.0, 20.0, 2.0, 14.0, 3.0, 20.0])

In [19]:
@show mean(no_init_TP)
@show mean(yes_init_TP)
[no_init_TP yes_init_TP]

mean(no_init_TP) = 4.5
mean(yes_init_TP) = 4.2


20×2 Matrix{Float64}:
 3.0  0.0
 3.0  5.0
 6.0  5.0
 3.0  3.0
 5.0  5.0
 4.0  4.0
 5.0  7.0
 5.0  4.0
 5.0  4.0
 2.0  2.0
 5.0  4.0
 6.0  5.0
 3.0  4.0
 6.0  6.0
 8.0  7.0
 5.0  5.0
 3.0  1.0
 4.0  5.0
 3.0  3.0
 6.0  5.0

In [20]:
@show mean(no_init_iter)
@show mean(yes_init_iter)
[no_init_iter yes_init_iter]

mean(no_init_iter) = 11.65
mean(yes_init_iter) = 18.8


20×2 Matrix{Float64}:
 13.0    6.0
 11.0    8.0
  8.0    9.0
  7.0    9.0
 10.0    7.0
  9.0    7.0
 13.0   11.0
 15.0    7.0
 11.0    9.0
  8.0   10.0
 12.0  200.0
 25.0   14.0
 12.0   16.0
 13.0    9.0
 10.0   11.0
 16.0    9.0
  9.0    7.0
 11.0   11.0
  8.0    6.0
 12.0   10.0

In [18]:
@show mean(no_init_best_k)
@show mean(yes_init_best_k)
[no_init_best_k yes_init_best_k]

mean(no_init_best_k) = 16.15
mean(yes_init_best_k) = 13.75


20×2 Matrix{Float64}:
 19.0   1.0
 17.0  18.0
 20.0  19.0
 11.0  20.0
 19.0  14.0
 12.0  15.0
 18.0  15.0
 16.0  17.0
 15.0   9.0
 10.0  11.0
 20.0  17.0
 20.0  11.0
 18.0  10.0
 19.0  19.0
 20.0  20.0
 16.0  20.0
  6.0   2.0
 20.0  14.0
  8.0   3.0
 19.0  20.0

## Try 20 simulations (simulated genotypes, no debias)

We will compare

+ Number of true positives
+ Number of iteration until convergence
+ Best cross validated k

in scenario where we initialize beta values, versus not initializing. 

In [21]:
function bench(sim::Int)
    # simulation parameters
    n = 1000
    p = 10000
    k = 10
    r = 2
    βσ = 0.1
    βoverlap = 2

    no_init_TP = zeros(sim)
    no_init_iter = zeros(sim)
    no_init_best_k = zeros(sim)
    yes_init_TP = zeros(sim)
    yes_init_iter = zeros(sim)
    yes_init_best_k = zeros(sim)

    for i in 1:sim
        xla, Z, true_b, Σ, Y = simulate_multivariate_sparse(n, p, k,r,seed=i,βoverlap=βoverlap)
        causal_snps = unique([x[1] for x in findall(!iszero, true_b)])

        # not initializing beta
        mses = cv_iht(Y, Transpose(xla), path=1:20, verbose=true)
        result = fit_iht(Y, Transpose(xla), k=argmin(mses), verbose=true)
        no_init_TP[i] = count(!iszero, sum(result.beta[:, causal_snps], dims=1))
        no_init_iter[i] = result.iter
        no_init_best_k[i] = argmin(mses)

        # initializing beta
        mses = cv_iht(Y, Transpose(xla), path=1:20, init_beta=true)
        result = fit_iht(Y, Transpose(xla), k=argmin(mses), verbose=true, init_beta = true)
        yes_init_TP[i] = count(!iszero, sum(result.beta[:, causal_snps], dims=1))
        yes_init_iter[i] = result.iter
        yes_init_best_k[i] = argmin(mses)
    end

    return no_init_TP, no_init_iter, no_init_best_k, 
        yes_init_TP, yes_init_iter, yes_init_best_k
end
@time no_init_TP, no_init_iter, no_init_best_k, 
    yes_init_TP, yes_init_iter, yes_init_best_k = bench(20)

[32mCross validating...100%|████████████████████████████████| Time: 0:00:50[39m




Crossvalidation Results:
	k	MSE
	1	3106.6316936694793
	2	3111.8676511084177
	3	2278.6561381609126
	4	2206.1907681338503
	5	1993.2244326438577
	6	1997.381409111596
	7	2014.1655251889138
	8	2016.840663407312
	9	2021.3604206629525
	10	2020.5638974155331
	11	2041.7857878257296
	12	1981.6171366995025
	13	1991.2197880029564
	14	2047.099925902276
	15	2003.93415039841
	16	2056.8532654558912
	17	2063.755690138678
	18	2065.6345456270355
	19	2057.5555786779114
	20	2063.9903926775596

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity param

[32mCross validating...100%|████████████████████████████████| Time: 0:00:44[39m




Crossvalidation Results:
	k	MSE
	1	2468.9215946156014
	2	2236.5187932997464
	3	1363.2457577614468
	4	1260.0653114790764
	5	1206.4067112507082
	6	1103.2759823205326
	7	947.1925500021966
	8	910.6574238705327
	9	876.8554365590012
	10	877.8786841941952
	11	877.5909695548985
	12	876.5205958047598
	13	880.642904753129
	14	884.0497574067717
	15	884.1371177827267
	16	883.180650805712
	17	889.6228031538425
	18	891.9389312985905
	19	899.2462818002177
	20	904.6363366260377

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) =

[32mCross validating...100%|████████████████████████████████| Time: 0:00:50[39m




Crossvalidation Results:
	k	MSE
	1	4496.020435857865
	2	4497.344695894095
	3	3361.68941169071
	4	2202.8965705199244
	5	1921.4245070128486
	6	1910.7811852613297
	7	1912.7143448166885
	8	1916.2062606297875
	9	1916.1455828128708
	10	1917.954847712207
	11	1927.0042439818603
	12	1929.501411201652
	13	1926.8211415104947
	14	1933.1523648384452
	15	1931.8373795518148
	16	1935.7613177451879
	17	1938.7908172291295
	18	1939.125696593709
	19	1945.8648608598573
	20	1955.6300551677696

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:00:33[39m




Crossvalidation Results:
	k	MSE
	1	3945.9372943125045
	2	2501.8980509082226
	3	2000.867903532041
	4	1983.4499349823109
	5	1789.3345563188873
	6	1769.606405197088
	7	1770.7602579332938
	8	1776.9448862889176
	9	1786.0713100610546
	10	1788.868352070404
	11	1786.974741262912
	12	1789.6825250966378
	13	1791.762031531015
	14	1789.8733209732818
	15	1797.3484032273159
	16	1798.9828956882602
	17	1815.9471628302138
	18	1817.8254978375662
	19	1828.6556625938233
	20	1832.6155756171254

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity param

[32mCross validating...100%|████████████████████████████████| Time: 0:00:49[39m




Crossvalidation Results:
	k	MSE
	1	3489.6555241904293
	2	3494.474126801845
	3	1778.7540509108806
	4	1339.5454613390482
	5	1210.196659493569
	6	1079.8497452077668
	7	1012.9907565111703
	8	968.2591486774797
	9	967.2258758890649
	10	926.553123292246
	11	908.1966889317815
	12	911.4376177356195
	13	937.0321705600661
	14	917.6054302684992
	15	927.2806954025779
	16	930.4018794106266
	17	930.1172708035707
	18	939.0126281025514
	19	944.6442460294405
	20	947.2831866043206

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) =

[32mCross validating...100%|████████████████████████████████| Time: 0:00:42[39m




Crossvalidation Results:
	k	MSE
	1	2752.3549631973638
	2	2323.643854200514
	3	1823.5396103527494
	4	1351.9334231380437
	5	1214.1159383507566
	6	1090.093403662984
	7	1018.1252631424201
	8	953.4431038346459
	9	945.2864281173024
	10	936.2032580962546
	11	936.4238332905865
	12	928.0225174365369
	13	928.0725265587612
	14	908.5566221987435
	15	909.5203742723178
	16	909.5531436601619
	17	909.9012070406259
	18	911.3193594249724
	19	906.5880283138355
	20	905.7548546438687

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) 

[32mCross validating...100%|████████████████████████████████| Time: 0:00:36[39m




Crossvalidation Results:
	k	MSE
	1	2792.3133345664046
	2	2792.7014642579416
	3	1243.597391969021
	4	976.8778352878767
	5	689.3533919071083
	6	480.156850129653
	7	393.7705860167749
	8	347.5022054515001
	9	311.7565984241739
	10	316.59918047149785
	11	317.6551442191203
	12	316.19505990113896
	13	317.99503527571403
	14	318.9771148895357
	15	321.2058184372913
	16	317.4235740330825
	17	316.44801743735934
	18	316.8785749897512
	19	317.4199250491932
	20	309.6871577934381

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) 

[32mCross validating...100%|████████████████████████████████| Time: 0:00:30[39m




Crossvalidation Results:
	k	MSE
	1	1988.3873437519644
	2	1463.4165343641105
	3	1198.521663528392
	4	874.0795397020013
	5	660.5650025217615
	6	478.37489077090765
	7	412.25979256365616
	8	345.6239157908992
	9	310.3086954808268
	10	283.21553251130524
	11	283.6911639285813
	12	284.1907042274252
	13	286.6805287637248
	14	289.09231882675624
	15	292.2455822635386
	16	299.8775467902505
	17	300.1673690155603
	18	291.6512097855743
	19	288.4757662838932
	20	285.3316465156499

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k)

[32mCross validating...100%|████████████████████████████████| Time: 0:00:22[39m




Crossvalidation Results:
	k	MSE
	1	4595.140451961215
	2	4596.136607749724
	3	1522.8865351471288
	4	1324.4636366359844
	5	1132.422039592402
	6	969.1608811192806
	7	938.9038110849252
	8	943.6119829210122
	9	950.0196603058567
	10	933.3306897420927
	11	951.726793073123
	12	956.464825956251
	13	955.2993021354027
	14	960.0400769321874
	15	956.6265858361742
	16	961.3906065864842
	17	945.7267115116947
	18	946.5960147218045
	19	950.9936476711036
	20	930.2356779665214

Best k = 20

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 20


[32mCross validating...100%|████████████████████████████████| Time: 0:00:19[39m




Crossvalidation Results:
	k	MSE
	1	2254.014500636544
	2	1819.2410369090105
	3	1471.6784995205771
	4	1321.822132851472
	5	1130.7853683189876
	6	969.0794924997331
	7	938.5560725574378
	8	875.8292832361349
	9	878.7388694046588
	10	880.8604677407432
	11	882.4615411370846
	12	886.3285243199643
	13	886.3306000509158
	14	886.7210215643662
	15	886.4992029785576
	16	888.8672563350179
	17	894.0696744409272
	18	893.2488126947819
	19	899.0475303735534
	20	897.3067338696065

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 8

[32mCross validating...100%|████████████████████████████████| Time: 0:01:03[39m




Crossvalidation Results:
	k	MSE
	1	1157.09169963501
	2	1157.2864849918913
	3	887.763124223543
	4	522.7385295872915
	5	387.5476542903084
	6	355.29639885637073
	7	391.79652397690944
	8	406.479866363582
	9	404.4761452938067
	10	420.13119173506556
	11	353.3388049095387
	12	352.4569750583696
	13	361.2930291485877
	14	378.03928616997956
	15	390.38758925799505
	16	394.3455895587669
	17	425.40130761793245
	18	428.83210952410354
	19	436.9684609769796
	20	432.0471185179016

Best k = 12

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) 

[32mCross validating...100%|████████████████████████████████| Time: 0:00:55[39m




Crossvalidation Results:
	k	MSE
	1	787.3203849571702
	2	609.8133336452847
	3	498.17456358747637
	4	407.7697615478791
	5	342.4816489035643
	6	278.83335530191846
	7	267.78800726698546
	8	261.9038379810549
	9	253.18171543584705
	10	247.06596444598438
	11	253.88640869634793
	12	253.59520875806456
	13	257.2751190782161
	14	261.2647209414808
	15	262.03678229415027
	16	262.84671966727325
	17	263.88501777882203
	18	268.65567240597534
	19	271.75408757002765
	20	275.8815696970643

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:00:49[39m




Crossvalidation Results:
	k	MSE
	1	1963.4392973954523
	2	1963.6948747663748
	3	1040.3446772971358
	4	823.2779401807754
	5	729.4940446961468
	6	738.3114737449614
	7	696.6662636189639
	8	683.1203904208451
	9	665.6105234624087
	10	582.2646800517456
	11	610.737532840987
	12	587.1165919550131
	13	587.2874233803099
	14	592.857671282334
	15	590.0077663116476
	16	592.9161801772758
	17	592.7625539301878
	18	596.6310165123954
	19	597.6564844816886
	20	606.1637191176045

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 10

[32mCross validating...100%|████████████████████████████████| Time: 0:00:41[39m




Crossvalidation Results:
	k	MSE
	1	1409.2025073100926
	2	1261.468882024943
	3	1061.504367926425
	4	812.8505551595189
	5	719.9686281977872
	6	638.8641245019929
	7	617.0365150899249
	8	553.95968621195
	9	547.4522321002582
	10	536.6030772188269
	11	532.2650838983666
	12	532.2774169639497
	13	530.9765726117289
	14	531.4143930902003
	15	529.7046446279767
	16	534.1425031352474
	17	532.9704332498553
	18	532.8532436624366
	19	531.5012971375456
	20	534.1523829009996

Best k = 15

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 15
P

[32mCross validating...100%|████████████████████████████████| Time: 0:00:22[39m




Crossvalidation Results:
	k	MSE
	1	3807.5666812056415
	2	3808.104230787025
	3	3527.04418048314
	4	3296.78776137285
	5	3323.331327583927
	6	3335.808555132067
	7	3340.3161656947373
	8	3344.343402243434
	9	3349.9533396153506
	10	3355.6010662624203
	11	3357.3923460729206
	12	3358.3643821463293
	13	3347.3571256313858
	14	3331.109108889261
	15	3329.8799294670835
	16	3330.1675324524795
	17	3330.801179625309
	18	3331.230723813489
	19	3314.160014547802
	20	3313.4915791467392

Best k = 4

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k

[32mCross validating...100%|████████████████████████████████| Time: 0:00:58[39m




Crossvalidation Results:
	k	MSE
	1	2613.704761277041
	2	2050.348130083562
	3	1679.5968029861276
	4	1221.0119990112796
	5	1046.4403940592458
	6	950.488142858182
	7	930.004000666881
	8	857.3182502980322
	9	996.8199792842378
	10	1041.1472609877721
	11	1091.1332232022548
	12	1151.4937308907195
	13	1226.4166542356268
	14	1281.8973330198876
	15	1359.3481036138683
	16	1412.70675488498
	17	1463.7024992851518
	18	1538.0277286675062
	19	1573.2753636248453
	20	1634.7385242228297

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter 

[32mCross validating...100%|████████████████████████████████| Time: 0:00:59[39m




Crossvalidation Results:
	k	MSE
	1	3274.5552672617323
	2	3275.7609361323885
	3	1701.9297417274759
	4	1107.6402891432742
	5	977.8627721751409
	6	966.606817091293
	7	966.8651626349542
	8	967.2436966869593
	9	968.1863753038116
	10	968.4809110379614
	11	969.205734224567
	12	971.6027401920801
	13	969.5513395774042
	14	971.7965665684447
	15	975.207788790015
	16	973.7603729995894
	17	973.8030548850659
	18	976.9751058737061
	19	974.3046900161057
	20	974.9227232806293

Best k = 6

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 6
P

[32mCross validating...100%|████████████████████████████████| Time: 0:00:43[39m




Crossvalidation Results:
	k	MSE
	1	2585.5604077995285
	2	1701.4955848663274
	3	1110.5000715200026
	4	1108.487476310147
	5	1050.2824478560476
	6	926.4725711410775
	7	919.2942561069615
	8	918.7694653555834
	9	917.9336978156332
	10	913.2472784046814
	11	917.665917222675
	12	919.6665399584914
	13	923.2754393777645
	14	930.9754882049845
	15	926.020510590411
	16	927.5952473619491
	17	934.263310367992
	18	934.6659005075956
	19	941.5987710640295
	20	942.7157218962335

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 10

[32mCross validating...100%|████████████████████████████████| Time: 0:00:52[39m




Crossvalidation Results:
	k	MSE
	1	2914.446390074004
	2	2915.185563570783
	3	2799.663654304171
	4	2795.98335517963
	5	2802.3195670572504
	6	2804.2408513622713
	7	2805.273135307965
	8	2806.5515725466857
	9	2806.4989800708067
	10	2806.495035867979
	11	2806.43380464905
	12	2806.435003099098
	13	2806.33668210598
	14	2806.4141969123198
	15	2806.412940929751
	16	2806.3672350148827
	17	2806.310790825639
	18	2779.997551913848
	19	2806.3478074382047
	20	2794.312964620642

Best k = 18

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) =

[32mCross validating...100%|████████████████████████████████| Time: 0:01:13[39m




Crossvalidation Results:
	k	MSE
	1	3041.5908238736597
	2	2766.3809042601374
	3	2476.0794084031436
	4	3578.3517090231417
	5	1850.5487571450412
	6	2376.260895300882
	7	2468.1260309459917
	8	1737.2366494563641
	9	1748.2711331193511
	10	1707.0917748158897
	11	1678.203600540372
	12	1656.648050542281
	13	1648.2539667947945
	14	1630.7093387539135
	15	1615.5206045467899
	16	1614.7297084023426
	17	1613.7917575751542
	18	1614.073622745619
	19	1613.246947540616
	20	1613.3208871273046

Best k = 19

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:00:58[39m




Crossvalidation Results:
	k	MSE
	1	4710.261442055839
	2	4713.6486106275715
	3	3006.3276981358877
	4	2742.7194139328244
	5	2397.1712593805337
	6	2367.199854703687
	7	2172.8732331849847
	8	2157.5551728300184
	9	1671.875276473846
	10	1717.6567162115211
	11	1745.2905616561065
	12	1755.55759505945
	13	1780.5125230962446
	14	1813.110262236968
	15	1820.2396504632366
	16	1834.1369151458662
	17	1833.6174745772346
	18	1849.7636164259823
	19	1858.6300365336738
	20	1864.6994680722582

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parame

[32mCross validating...100%|████████████████████████████████| Time: 0:00:56[39m




Crossvalidation Results:
	k	MSE
	1	3681.455036658348
	2	3238.7342485574377
	3	2527.2812803302245
	4	2169.885212898438
	5	1956.6854812024667
	6	1772.9938816588774
	7	1612.0880825607207
	8	1757.0924613698571
	9	1578.539740349815
	10	1538.2693456793459
	11	1545.0304785860376
	12	1556.1546099361144
	13	1570.6756903672347
	14	1590.593140874219
	15	1597.4527152734358
	16	1612.0399061372964
	17	1646.1950258964591
	18	1674.6219712285736
	19	1676.5330556884032
	20	1682.9918334822198

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity par

[32mCross validating...100%|████████████████████████████████| Time: 0:00:16[39m




Crossvalidation Results:
	k	MSE
	1	3085.6947289326813
	2	3086.522463278342
	3	1280.3764809123186
	4	1093.1044373865636
	5	953.7061794457082
	6	885.7599465667346
	7	779.7304041358415
	8	656.9779509561691
	9	606.3634927138287
	10	578.8354988408313
	11	580.9813564119968
	12	582.7883064781246
	13	587.8570949447796
	14	584.9278442712252
	15	589.348234342801
	16	591.5155825897203
	17	591.6094083603214
	18	590.4720423445441
	19	585.6535789074649
	20	591.9910330564652

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 1

[32mCross validating...100%|████████████████████████████████| Time: 0:00:23[39m




Crossvalidation Results:
	k	MSE
	1	2046.9518902492928
	2	1604.3141925458895
	3	1279.4022886125852
	4	1090.8345016287944
	5	948.5432327893868
	6	854.5413074120954
	7	762.2425777043516
	8	655.0644617299786
	9	600.2648864462759
	10	578.5254056905578
	11	580.126405158222
	12	582.5874534527551
	13	584.2753962601635
	14	584.257313808817
	15	589.8199339829724
	16	591.4261851548256
	17	588.3581416603167
	18	590.2244122216752
	19	592.6691363919684
	20	594.2398323046647

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 1

[32mCross validating...100%|████████████████████████████████| Time: 0:00:18[39m




Crossvalidation Results:
	k	MSE
	1	1254.6862101400257
	2	1254.5305739004386
	3	498.3133815124463
	4	465.15572990140106
	5	427.3804159745016
	6	373.13888396612117
	7	357.42651167203894
	8	353.81438996524673
	9	330.31007523420953
	10	332.84151466077964
	11	335.3164681927085
	12	335.2989418749625
	13	338.0107609657507
	14	337.99650105026546
	15	342.42694539669594
	16	343.3099785191951
	17	344.3935088133412
	18	349.8478081766786
	19	352.6549375229203
	20	349.33498117803975

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

[32mCross validating...100%|████████████████████████████████| Time: 0:00:18[39m




Crossvalidation Results:
	k	MSE
	1	807.3092017166222
	2	561.1100513472287
	3	496.7763772371825
	4	439.4740441701499
	5	414.9524615143762
	6	372.70488507044183
	7	350.67597078448205
	8	335.0343833932549
	9	328.6660376019271
	10	329.7423102462267
	11	333.1794276339857
	12	336.3296329613254
	13	337.5968271423043
	14	336.58293060083247
	15	339.1859122053578
	16	340.92567766643083
	17	340.19776687070373
	18	343.32946955181166
	19	346.51272685436743
	20	350.5417971180179

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k)

[32mCross validating...100%|████████████████████████████████| Time: 0:00:15[39m




Crossvalidation Results:
	k	MSE
	1	2906.903091026046
	2	2905.3179160951054
	3	1501.1001671335162
	4	1262.6276681224158
	5	936.2878244454575
	6	857.8392125912603
	7	735.0252509067035
	8	639.2655099190295
	9	621.9368414101432
	10	601.507948126449
	11	607.9220593269883
	12	610.4848840059257
	13	609.0866885741165
	14	612.5165311323551
	15	618.2335320902755
	16	621.3545031688531
	17	621.7172696927438
	18	619.4370964745556
	19	622.2574325874447
	20	626.1305820854263

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 1

[32mCross validating...100%|████████████████████████████████| Time: 0:00:16[39m




Crossvalidation Results:
	k	MSE
	1	2105.14179614576
	2	1741.3248563054012
	3	1439.41052179479
	4	1157.3230727526006
	5	927.4339903542577
	6	832.772740178108
	7	697.5140344271204
	8	630.8500140447279
	9	613.7656555141391
	10	601.040555454014
	11	595.3507825701064
	12	604.3189076613398
	13	598.3895897797217
	14	600.2877922509699
	15	597.8985927783831
	16	599.1115083850472
	17	602.2456607992276
	18	605.9645759439898
	19	608.774183653265
	20	607.3150443165656

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 11
Pri

[32mCross validating...100%|████████████████████████████████| Time: 0:00:25[39m




Crossvalidation Results:
	k	MSE
	1	5838.903005107026
	2	5840.277303699699
	3	3610.5307995581115
	4	3252.7559698986674
	5	2884.7352505444696
	6	2407.814783904978
	7	2395.7624961062456
	8	2392.3280420767906
	9	2388.9627946047362
	10	2389.9608416288984
	11	2389.3073390545424
	12	2399.050029396526
	13	2389.011200897846
	14	2387.2313662130564
	15	2406.690318535553
	16	2410.744437824212
	17	2372.4474963713747
	18	2386.0784386056944
	19	2431.774344408924
	20	2424.1047027022814

Best k = 17

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramet

[32mCross validating...100%|████████████████████████████████| Time: 0:00:24[39m




Crossvalidation Results:
	k	MSE
	1	4420.910798268531
	2	3815.3776038203396
	3	3408.047339667114
	4	2993.664593009816
	5	2642.9949245294215
	6	2416.3854554521554
	7	2389.274384699736
	8	2407.5052440184268
	9	2404.371035617345
	10	2424.1794829403325
	11	2419.6731302127855
	12	2442.2193726044075
	13	2450.9557752096907
	14	2469.118677877261
	15	2486.4929424554907
	16	2474.78598056316
	17	2502.259162388748
	18	2491.2229923846135
	19	2501.213338974356
	20	2525.4216445937645

Best k = 7

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter 

[32mCross validating...100%|████████████████████████████████| Time: 0:00:28[39m




Crossvalidation Results:
	k	MSE
	1	2648.4033814544323
	2	2650.1759203638812
	3	1578.0127135800426
	4	1507.044416595073
	5	1439.5084018701184
	6	1424.2534645297035
	7	1387.81726935319
	8	888.2457252587494
	9	891.5722347596021
	10	890.8838818427504
	11	895.8471418686933
	12	895.3578486280056
	13	898.2918566768792
	14	896.6058020849833
	15	902.6494751435464
	16	904.1957450615809
	17	903.9800059200336
	18	907.9672176259245
	19	906.3313197800642
	20	907.7981122830413

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 

[32mCross validating...100%|████████████████████████████████| Time: 0:00:25[39m




Crossvalidation Results:
	k	MSE
	1	2229.6075185554287
	2	2064.510977784552
	3	1307.4665616821583
	4	1076.893530722219
	5	1004.9994899583277
	6	941.4392185549376
	7	920.2327495044401
	8	885.4872973175933
	9	891.4923726167779
	10	892.6601145212669
	11	894.4656120642368
	12	897.0605489479121
	13	899.8913768403507
	14	905.0550567428904
	15	906.2174732282353
	16	906.517798355982
	17	907.7522454720125
	18	929.1003267705323
	19	934.0427514481166
	20	940.3106995046116

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) = 8


[32mCross validating...100%|████████████████████████████████| Time: 0:00:18[39m




Crossvalidation Results:
	k	MSE
	1	1285.1171564340136
	2	1285.4656464054276
	3	450.7001443746178
	4	373.0396923606716
	5	311.13343431716385
	6	294.91591784623154
	7	277.5729221870715
	8	212.50260161103537
	9	126.93891179653659
	10	131.747249311314
	11	98.72391495224515
	12	99.5736184719303
	13	99.84643147055124
	14	99.94530310511546
	15	101.03685467477385
	16	101.1008932630591
	17	102.01158765238569
	18	102.25949981970611
	19	100.68972916194627
	20	102.2270574924038

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (

[32mCross validating...100%|████████████████████████████████| Time: 0:00:17[39m




Crossvalidation Results:
	k	MSE
	1	691.399176475003
	2	543.9631235230005
	3	456.12426459750725
	4	382.56270828081995
	5	273.68905555689537
	6	211.43497616676964
	7	151.84818889216655
	8	129.67372953129575
	9	105.96490540644203
	10	98.09527908373737
	11	98.24834908688254
	12	98.78344365051976
	13	99.20984614579754
	14	98.92758347974122
	15	99.66813610320526
	16	99.6986779677481
	17	99.99252061589654
	18	100.7180069050018
	19	100.42705147742974
	20	101.0165898298944

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k)

[32mCross validating...100%|████████████████████████████████| Time: 0:00:09[39m




Crossvalidation Results:
	k	MSE
	1	1410.553286162747
	2	1410.8205285658187
	3	612.4516683903041
	4	453.86806870772455
	5	351.1035284439664
	6	308.6552004466055
	7	250.4608636320325
	8	230.9422965922345
	9	214.02524930038066
	10	214.60853566229395
	11	216.82152047347236
	12	218.3705689355747
	13	221.02080909991247
	14	224.74082830318648
	15	227.60003790867708
	16	227.80217157372937
	17	228.90663162360318
	18	231.46746100333488
	19	234.0251268513403
	20	235.32631085478653

Best k = 9

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:00:11[39m




Crossvalidation Results:
	k	MSE
	1	1082.559520621593
	2	855.6879594632227
	3	617.6191107903982
	4	457.2679162409461
	5	354.03410676951773
	6	286.46871453682684
	7	251.35752743778949
	8	231.503491475172
	9	214.56872522923308
	10	214.7998368433131
	11	213.75211498265367
	12	215.81793910642932
	13	220.62819196224865
	14	220.73908303406205
	15	225.4837520667072
	16	224.7622402173621
	17	226.45414113184984
	18	226.88407232306432
	19	230.20380538600284
	20	231.00837747773951

Best k = 11

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity paramete

[32mCross validating...100%|████████████████████████████████| Time: 0:01:02[39m




Crossvalidation Results:
	k	MSE
	1	4359.433040133937
	2	4361.114796346383
	3	1765.8461028236236
	4	1405.4078028874605
	5	1927.0029077968939
	6	1541.7058364880088
	7	1339.652609577318
	8	1422.3909650188416
	9	1658.2410030647125
	10	1888.6210869274878
	11	1910.4004295811724
	12	1904.5275435228025
	13	1933.957799699717
	14	1919.210658151604
	15	1983.9384179415788
	16	1938.3934920775919
	17	1992.348750725069
	18	1989.6654958105598
	19	1983.8385132385856
	20	1959.8486819845289

Best k = 7

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parame

[32mCross validating...100%|████████████████████████████████| Time: 0:01:03[39m




Crossvalidation Results:
	k	MSE
	1	2626.4108653865396
	2	1957.8067316621168
	3	1723.874311624655
	4	1402.6643050401074
	5	1325.1708480147224
	6	1289.4059399338028
	7	1219.9514137142296
	8	1197.961169927704
	9	1200.4140420327583
	10	1202.1261414950739
	11	1205.4802116103635
	12	1205.0805072958563
	13	1206.8045054345082
	14	1205.7086051592305
	15	1208.356720009313
	16	1211.2108499070341
	17	1212.4057901180313
	18	1211.8775894827952
	19	1214.795458227582
	20	1214.0862784048843

Best k = 8

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity para

[32mCross validating...100%|████████████████████████████████| Time: 0:00:14[39m




Crossvalidation Results:
	k	MSE
	1	1425.4262809217332
	2	1425.9019248608445
	3	527.7941970911991
	4	419.24903992749637
	5	397.0027810278634
	6	362.81349431871547
	7	361.3582692568351
	8	358.7956458530043
	9	354.932652320649
	10	348.93409939169607
	11	348.2200051389516
	12	347.55905463033
	13	346.6825601950774
	14	348.5471232436593
	15	348.8493868144945
	16	354.3649244944348
	17	356.84257552340193
	18	358.2755727597817
	19	361.2538653188203
	20	359.04829721953234

Best k = 13

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter (k) =

[32mCross validating...100%|████████████████████████████████| Time: 0:00:14[39m




Crossvalidation Results:
	k	MSE
	1	880.0617780453556
	2	674.4509856546438
	3	530.7150200623353
	4	423.52859050425275
	5	399.20316823856956
	6	366.3683945434364
	7	357.37176071917213
	8	356.1899623251478
	9	356.7913274173169
	10	348.4037746063592
	11	349.90754160081246
	12	348.77631651858707
	13	350.3270670918856
	14	353.80277605645847
	15	354.56248637862535
	16	358.19045541874016
	17	360.92225692378616
	18	360.1472806883594
	19	364.53645292601607
	20	362.1857033948058

Best k = 10

****                   MendelIHT Version 1.4.0                  ****
****     Benjamin Chu, Kevin Keys, Chris German, Hua Zhou       ****
****   Jin Zhou, Eric Sobel, Janet Sinsheimer, Kenneth Lange    ****
****                                                            ****
****                 Please cite our paper!                     ****
****         https://doi.org/10.1093/gigascience/giaa044        ****

Running sparse Multivariate Gaussian regression
Link functin = IdentityLink()
Sparsity parameter

([8.0, 5.0, 8.0, 8.0, 7.0, 8.0, 8.0, 4.0, 5.0, 8.0, 7.0, 8.0, 7.0, 8.0, 7.0, 7.0, 8.0, 8.0, 7.0, 8.0], [200.0, 64.0, 195.0, 71.0, 48.0, 200.0, 132.0, 200.0, 68.0, 200.0, 200.0, 20.0, 21.0, 17.0, 40.0, 85.0, 23.0, 8.0, 200.0, 18.0], [12.0, 6.0, 11.0, 20.0, 20.0, 12.0, 10.0, 4.0, 6.0, 18.0, 9.0, 10.0, 9.0, 10.0, 17.0, 8.0, 11.0, 9.0, 7.0, 13.0], [7.0, 4.0, 8.0, 8.0, 7.0, 8.0, 8.0, 6.0, 6.0, 8.0, 8.0, 8.0, 7.0, 8.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0], [101.0, 37.0, 29.0, 38.0, 14.0, 89.0, 64.0, 200.0, 48.0, 200.0, 124.0, 13.0, 18.0, 14.0, 9.0, 28.0, 12.0, 6.0, 154.0, 14.0], [12.0, 6.0, 20.0, 10.0, 8.0, 10.0, 15.0, 8.0, 10.0, 19.0, 10.0, 10.0, 9.0, 11.0, 7.0, 8.0, 10.0, 11.0, 8.0, 10.0])

In [22]:
@show mean(no_init_TP)
@show mean(yes_init_TP)
[no_init_TP yes_init_TP]

mean(no_init_TP) = 7.2
mean(yes_init_TP) = 7.35


20×2 Matrix{Float64}:
 8.0  7.0
 5.0  4.0
 8.0  8.0
 8.0  8.0
 7.0  7.0
 8.0  8.0
 8.0  8.0
 4.0  6.0
 5.0  6.0
 8.0  8.0
 7.0  8.0
 8.0  8.0
 7.0  7.0
 8.0  8.0
 7.0  7.0
 7.0  7.0
 8.0  8.0
 8.0  8.0
 7.0  8.0
 8.0  8.0

In [23]:
@show mean(no_init_iter)
@show mean(yes_init_iter)
[no_init_iter yes_init_iter]

mean(no_init_iter) = 100.5
mean(yes_init_iter) = 60.6


20×2 Matrix{Float64}:
 200.0  101.0
  64.0   37.0
 195.0   29.0
  71.0   38.0
  48.0   14.0
 200.0   89.0
 132.0   64.0
 200.0  200.0
  68.0   48.0
 200.0  200.0
 200.0  124.0
  20.0   13.0
  21.0   18.0
  17.0   14.0
  40.0    9.0
  85.0   28.0
  23.0   12.0
   8.0    6.0
 200.0  154.0
  18.0   14.0

In [24]:
@show mean(no_init_best_k)
@show mean(yes_init_best_k)
[no_init_best_k yes_init_best_k]

mean(no_init_best_k) = 11.1
mean(yes_init_best_k) = 10.6


20×2 Matrix{Float64}:
 12.0  12.0
  6.0   6.0
 11.0  20.0
 20.0  10.0
 20.0   8.0
 12.0  10.0
 10.0  15.0
  4.0   8.0
  6.0  10.0
 18.0  19.0
  9.0  10.0
 10.0  10.0
  9.0   9.0
 10.0  11.0
 17.0   7.0
  8.0   8.0
 11.0  10.0
  9.0  11.0
  7.0   8.0
 13.0  10.0

## Can we increase IHT's true positive by finding nearby SNPs?

Short answer: not really (increase is 1-2%)

In [29]:
TP, TP_LD = process_iht(0.9, verbose=false)
@show mean(TP), mean(TP_LD);

k = 10, r = 2, polygenic model

(mean(TP), mean(TP_LD)) = (0.42875, 0.43875)


In [30]:
TP, TP_LD = process_iht(0.5)
@show mean(TP), mean(TP_LD);

k = 10, r = 2, polygenic model

(mean(TP), mean(TP_LD)) = (0.42875, 0.44125)


## Conclusion:
+ mvPLINK and GEMMA have essentially identical power, about 20% higher than IHT
+ High power of mvPLINK and GEMMA also gives **extremely high** false positives (type I error)
+ QQ and Manhattan plot for GEMMA and PLINK often look insanely bad
+ **Simulated genotypes *does not* produce insane p-values for GEMMA and mvPLINK.**
+ Searching SNPs in LD with the causal SNPs does not increase true positive rate for IHT