# Simulations using NFBC1966 results

Using chromosome 1 of the NFBC (Stampeed) data ($n=5340$ samples and $p = 24523$ SNPs), let us simulate:

## Polygenic model

For $r$ traits, our simulation model is:

$$\mathbf{Y}_{r \times n} \sim \text{MatrixNormal}(\mathbf{B}_{r \times p}\mathbf{X}_{p \times n}, \ \ \Sigma_{r \times r} , \ \ \sigma_g^2\Phi_{n \times n} + \sigma_e^2 \mathbf{I}_{n \times n})$$

+ $\mathbf{X}_{p \times n}$ contains *all* predictors (genetic + non-genetic)
+ $\mathbf{B}_{r \times p}$ contains (true) regression coefficients. $k$ entries are non-zero
+ $\Phi$ is the GRM estimated from genotypes
+ $\Sigma_{r \times r}$ is the symmetric PD row (trait) covariance matrix where max condition number is set to 10
+ $\sigma_g^2\Phi_{n \times n} + \sigma_e^2 \mathbf{I}_{n \times n}$ is the column (sample) covariance matrix
+ $\sigma_g^2 = 0.1$ and $\sigma_e^2 = 0.9$ (thus heritability is 10%)
+ $k$ causal SNPs chosen fixed distance with each other across genome
+ $\beta_i:$ every causal SNP affects every phenotype. Causal SNP $i$ have effect size sums up to 0.5 across $r$ traits
+ $r = \{2, 3, 4\}$  (number of traits)
+ $n=5340$ samples
+ $p = 24523$ SNPs
+ q = 5 fold cross validation
+ iterates ≥5 times
+ init_beta=true
+ debias=false

## Sparse model

With $r$ traits, each sample's phenotype $\mathbf{y}_{i} \in \mathbb{R}^{n \times 1}$ is simulated under

$$\mathbf{y}_{i}^{r \times 1} \sim N(\mathbf{B}^{r \times p}\mathbf{x}_{i}^{p \times 1}, \ \ \Sigma_{r \times r})$$

This model assumes each sample is independent.

+ $\mathbf{X}_{p \times n}$ contains *all* predictors (genetic + non-genetic)
+ $\mathbf{B}_{r \times p}$ contains (true) regression coefficients. $k$ entries are non-zero
+ $B_{ij} \in \{0.05, 0.1, ..., 0.5\}$ for $k$ positions
+ $k = \{10, 20, 30\}$ (number of causal SNPs)
+ $\Sigma_{r \times r}$ is the symmetric PD row (trait) covariance matrix where max condition number is set to 10
+ $k = \{10, 20, 30\}$ (number of causal SNPs)
+ $r = \{2, 3, 4\}$  (number of traits)
+ $n=5340$ samples
+ $p = 24523$ SNPs
+ q = 5 fold cross validation
+ iterates ≥5 times
+ init_beta=true
+ debias=false

# Summary

In [2]:
using Statistics

"""
Find the `nsnps` most significant SNPs for mvPLINK in simulation `sim`.
"""
function get_top_mvPLINK_SNPs(set::Int, sim::Int, nsnps::Int)
    n, p = 5340, 24523
    dir = "set$set/sim$sim/"
    
    # read mvPLINK result
    mvplink_df = CSV.read(dir * "plink.mqfam.total", DataFrame, delim=' ', ignorerepeated=true)

    # get pvalues, possibly accounting for "NA"s
    if eltype(mvplink_df[!, :P]) == Float64
        pval = mvplink_df[!, :P]
    else
        mvplink_df[findall(x -> x == "NA", mvplink_df[!, :P]), :P] .= "1.0"
        pval = parse.(Float64, mvplink_df[!, :P])
    end
    perm = sortperm(pval)
    return perm[1:nsnps]
end

"""
Find the position of the `nsnps` most significant SNPs for GEMMA in simulation `sim`.
"""
function get_top_GEMMA_SNP_ids(set::Int, sim::Int, nsnps::Int)
    dir = "set$set/sim$sim/"
    gemma_df = CSV.read(dir * "gemma.sim$sim.assoc.txt", DataFrame)
    pval_wald = gemma_df[!, :p_wald]
    perm = sortperm(pval_wald)
    return perm[1:nsnps]
end

"""
Find significant SNPs return by IHT in simulation `sim`.
"""
function get_IHT_SNPs(set::Int, sim::Int)
    dir = "set$set/sim$sim/"
    iht_β1 = vec(readdlm(dir * "iht_beta1.txt"))
    iht_β2 = vec(readdlm(dir * "iht_beta2.txt"))
    detected_snps = findall(!iszero, iht_β1) ∪ findall(!iszero, iht_β2)
    return unique(detected_snps)
end

"""
Get positions for the truly causal SNPs in simulation `sim`. 
"""
function get_true_SNPs(set::Int, sim::Int)
    dir = "set$set/sim$sim/"
    trueB = readdlm(dir * "trueb.txt")
    causal_snps = unique([x[1] for x in findall(!iszero, trueB)])
    return causal_snps
end

"""
Get causal SNPs' position in GEMMA's result for simulation `sim`. Note gemma have snp filtering.
"""
function get_gemma_causal_snp_pos(set::Int, sim::Int)
    nfbc = SnpData("NFBC.qc.imputeBy0.chr.1")
    dir = "set$set/sim$sim/"
    trueB = readdlm(dir * "trueb.txt")
    causal_snps = unique([x[1] for x in findall(!iszero, trueB)])
    causal_snp_rsID = nfbc.snp_info.snpid[causal_snps]
    gemma_df = CSV.read(dir * "gemma.sim$sim.assoc.txt", DataFrame)
    gemma_snps = gemma_df[!, :rs]
    causal_snp_idx = convert(Vector{Int}, indexin(causal_snp_rsID, gemma_snps))
    
    # also need IHT's selected SNPs
    iht_snps_rsID = nfbc.snp_info.snpid[get_IHT_SNPs(sim)]
    iht_snps_idx = convert(Vector{Int}, indexin(iht_snps_rsID, gemma_snps))
    
    # also need SNP positions in GEMMA dataframe
    gemma2nfbc_idx = convert(Vector{Int}, indexin(gemma_snps, nfbc.snp_info.snpid))
    gemma_snp_pos = Vector{Int}(undef, size(gemma_df, 1))
    for i in 1:size(gemma_df, 1)
        gemma_snp_pos[i] = nfbc.snp_info.position[gemma2nfbc_idx[i]]
    end
    insertcols!(gemma_df, size(gemma_df, 2) + 1, :pos => gemma_snp_pos)
    
    return gemma_df, causal_snp_idx, iht_snps_idx
end

"""
Imports gemma p-values, causal SNPs, and IHT selected SNP, and plot manhattan plot using MendelPlots.jl
"""
function plot_gemma_manhattan(sim::Int)
    # GEMMA causal SNPs
    gemma_df, causal_snps, iht_snps = get_gemma_causal_snp_pos(sim)
    rename!(gemma_df, [:p_wald => :pval, :rs => :snpid])
    gemma_df[findall(x -> x < 1e-50, gemma_df[!, :pval]), :pval] .= 1e-50
    empty_col = ["" for i in 1:size(gemma_df, 1)]
    insertcols!(gemma_df, size(gemma_df, 2) + 1, :empty_col => empty_col)

    manhattan(gemma_df, outfile = "NFBCsim/manhattan_gemma_sim$sim.png",
        annotateinds = causal_snps, annotateinds2 = iht_snps,
        annotatevar=:empty_col, titles="GEMMA simulation $sim")
    display("image/png", read("NFBCsim/manhattan_gemma_sim$sim.png"))
end

"""
Imports mvPLINK p-values, causal SNPs, and IHT selected SNP, and plot manhattan plot using MendelPlots.jl
"""
function plot_mvPLINK_manhattan(sim::Int)
    # mvPLINK
    filename = "NFBCsim/sim$sim/plink.mqfam.total"
    mvplink_df = CSV.read(filename, DataFrame, delim=' ', ignorerepeated=true)
    if eltype(mvplink_df[!, :P]) == Float64
        pval = mvplink_df[!, :P]
    else
        mvplink_df[findall(x -> x == "NA", mvplink_df[!, :P]), :P] .= "1.0"
        pval = parse.(Float64, mvplink_df[!, :P])
    end
    pval[findall(x -> x < 1e-50, pval)] .= 1e-50 # limit smallest pvalues

    # causal SNPs
    causal_snps = get_true_SNPs(sim)
    
    # IHT SNPs
    iht_snps = get_IHT_SNPs(sim)

    # make dataframe to input into MendelPlots
    snpdata = SnpData("/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/imputed_with_0/NFBC_imputed_with_0")
    rename!(snpdata.snp_info, [:chr, :snpid, :genetic_distance, :pos, :allele1, :allele2])
    insertcols!(snpdata.snp_info, size(snpdata.snp_info, 2) + 1, :pval => pval)
    empty_col = ["" for i in 1:size(snpdata.snp_info, 1)]
    insertcols!(snpdata.snp_info, size(snpdata.snp_info, 2) + 1, :empty_col => empty_col)

    # plot
    manhattan(snpdata.snp_info, outfile = "NFBCsim/manhattan_mvPLINK_sim$sim.png",
        annotateinds = causal_snps, annotateinds2 = iht_snps, 
        annotatevar=:empty_col, titles="mvPLINK simulation $sim")
    display("image/png", read("NFBCsim/manhattan_mvPLINK_sim$sim.png"))
end

"""
Imports mvPLINK p-values and plot QQ plot using MendelPlots.jl
"""
function plot_mvPLINK_QQ(sim::Int)
    filename = "NFBCsim/sim$sim/plink.mqfam.total"
    mvplink_df = CSV.read(filename, DataFrame, delim=' ', ignorerepeated=true)
    if eltype(mvplink_df[!, :P]) == Float64
        pval = mvplink_df[!, :P]
    else
        mvplink_df[findall(x -> x == "NA", mvplink_df[!, :P]), :P] .= "1.0"
        pval = parse.(Float64, mvplink_df[!, :P])
    end
    pval[findall(x -> x < 1e-50, pval)] .= 1e-50 # limit smallest pvalues
    qq(pval, outfile = "NFBCsim/QQ_mvPLINK_sim$sim.png",
        ylabel="mvPLINK observed -log10(p)", titles="mvPLINK simulation $sim")
    display("image/png", read("NFBCsim/QQ_mvPLINK_sim$sim.png"))
end

"""
Imports gemma p-values and plot QQ plot using MendelPlots.jl
"""
function plot_gemma_QQ(sim::Int)
    filename = "NFBCsim/sim$sim/gemma.sim$sim.assoc.txt"
    gemma_df = CSV.read(filename, DataFrame)
    pval_wald = gemma_df[!, :p_wald]
    pval_wald[findall(x -> x < 1e-50, pval_wald)] .= 1e-50 # limit smallest pvalues
    qq(pval_wald, outfile = "NFBCsim/QQ_gemma_sim$sim.png",
        ylabel="GEMMA observed -log10(p)", titles="GEMMA simulation $sim")
    display("image/png", read("NFBCsim/QQ_gemma_sim$sim.png"))
end

# """
# Summarize all simulations for IHT, mvPLINK, GEMMA in computation time, true positives,
# false positives, and false positive rates. 
# """
# function summarize_repeats()
#     model = "NFBCsim"
#     n, p = 5340, 24523
#     sims = 1:50 # k = 10, r = 2, βoverlap=2, polygenic model
#     nfbc = SnpData("/Users/biona001/Benjamin_Folder/UCLA/research/stampeed/imputed_with_0/NFBC_imputed_with_0")
#     snp_rsID = nfbc.snp_info.snpid

#     iht_time, iht_power, iht_FP, iht_FPR = Float64[], Float64[], Float64[], Float64[]
#     mvPLINK_time, mvPLINK_power, mvPLINK_FP, mvPLINK_FPR = Float64[], Float64[], Float64[], Float64[]
#     gemma_time, gemma_power, gemma_FP, gemma_FPR = Float64[], Float64[], Float64[], Float64[]

#     for sim in sims
#         dir = "NFBCsim/sim$sim/"
#         try
#             # correct SNPs
#             trueB = readdlm(dir * "trueb.txt")
#             causal_snps = unique([x[1] for x in findall(!iszero, trueB)])
#             causal_snps_rsID = snp_rsID[causal_snps]

#             # IHT
#             iht_β1 = vec(readdlm(dir * "iht_beta1.txt"))
#             iht_β2 = vec(readdlm(dir * "iht_beta2.txt"))
#             detected_snps = findall(!iszero, iht_β1) ∪ findall(!iszero, iht_β2)
#             ihtpower, ihtFP, ihtFPR = power_and_fpr(p, causal_snps, detected_snps)

#             # MVPLINK
#             plinkpower, plinkFP, plinkFPR = process_mvPLINK(dir * "plink.mqfam.total", causal_snps)

#             # GEMMA 
#             gemmapower, gemmaFP, gemmaFPR = process_gemma_result(dir * "gemma.sim$sim.assoc.txt", causal_snps_rsID)
            
#             push!(iht_power, ihtpower); push!(iht_FP, ihtFP); push!(iht_FPR, ihtFPR); 
#             push!(mvPLINK_power, plinkpower); push!(mvPLINK_FP, plinkFP); push!(mvPLINK_FPR, plinkFPR); 
#             push!(gemma_power, gemmapower); push!(gemma_FP, gemmaFP); push!(gemma_FPR, gemmaFPR);
#         catch
#             println("simulation $sim failed!")
#         end
#     end

#     return iht_time, iht_power, iht_FP, iht_FPR,
#         mvPLINK_time, mvPLINK_power, mvPLINK_FP, mvPLINK_FPR,
#         gemma_time, gemma_power, gemma_FP, gemma_FPR
# end

"""
For each simulation set, after performing n simulations using `run_repeats`,
this function reads the summary files for each simulation and summarizes the result. 
"""
function read_summary(; verbose=true)    
    mIHT_time, mIHT_plei_power, mIHT_indp_power, mIHT_FP, mIHT_FPR = 
        Float64[], Float64[], Float64[], Float64[], Float64[]
    uIHT_time, uIHT_plei_power, uIHT_indp_power, uIHT_FP, uIHT_FPR = 
        Float64[], Float64[], Float64[], Float64[], Float64[]
    mvPLINK_time, mvPLINK_plei_power, mvPLINK_indp_power, mvPLINK_FP, 
        mvPLINK_FPR, mvPLINK_λ = Float64[], Float64[], Float64[], Float64[], Float64[], Float64[]
    gemma_time, gemma_plei_power, gemma_indp_power, gemma_FP, gemma_FPR, gemma_λ = 
        Float64[], Float64[], Float64[], Float64[], Float64[], Float64[]

    regex = r"= (\d+\.\d+) seconds, pleiotropic power = (.+), independent power = (\d+\.\d+), FP = (\d+), FPR = (\d\.\d+e?-?\d*), λ = (.+)"


    # polygenic beta = sign * Uniform{0.05, …, 0.5}
    # max condition number = 10
    # q = 5, iterates ≥5 times, init_beta=true, debias=false)
    # sim set 1 are for k = 10, r = 2, βoverlap = 3, path = 5:5:50 (then search around best k)
    # sim set 2 are for k = 20, r = 2, βoverlap = 5, path = 5:5:50 (then search around best k)
    # sim set 3 are for k = 30, r = 2, βoverlap = 7, path = 5:5:50 (then search around best k)
    # sim set 4 are for k = 10, r = 3, βoverlap = 2, path = 5:5:50 (then search around best k)
    # sim set 5 are for k = 20, r = 3, βoverlap = 5, path = 5:5:50 (then search around best k)
    # sim set 6 are for k = 30, r = 3, βoverlap = 7, path = 5:5:50 (then search around best k)
    # sim set 7 are for k = 10, r = 4, βoverlap = 2, path = 5:5:50 (then search around best k)
    # sim set 8 are for k = 20, r = 4, βoverlap = 3, path = 5:5:50 (then search around best k)
    # sim set 9 are for k = 30, r = 4, βoverlap = 5, path = 5:5:50 (then search around best k)

    # compute summary statistics
    open("summary.txt", "w") do summary_io
        for set in 1:6
            successes = 0
            empty!(mIHT_time); empty!(mIHT_plei_power); empty!(mIHT_indp_power); empty!(mIHT_FP); empty!(mIHT_FPR)
            empty!(uIHT_time); empty!(uIHT_plei_power); empty!(uIHT_indp_power); empty!(uIHT_FP); empty!(uIHT_FPR)
            empty!(mvPLINK_time); empty!(mvPLINK_plei_power); empty!(mvPLINK_indp_power); empty!(mvPLINK_FP); empty!(mvPLINK_FPR); empty!(mvPLINK_λ)
            empty!(gemma_time); empty!(gemma_plei_power); empty!(gemma_indp_power); empty!(gemma_FP); empty!(gemma_FPR); empty!(gemma_λ)

            # read each simulation's result
            for sim in 1:100
                if !isfile("set$set/sim$sim/summary.txt")
                    continue
                end
                try
                    open("set$set/sim$sim/summary.txt", "r") do io
                        readline(io); readline(io); readline(io); readline(io); readline(io)

                        # parse mIHT result
                        mIHT = match(regex, readline(io))
                        push!(mIHT_time, parse(Float64, mIHT[1]))
                        push!(mIHT_plei_power, parse(Float64, mIHT[2]))
                        push!(mIHT_indp_power, parse(Float64, mIHT[3]))
                        push!(mIHT_FP, parse(Float64, mIHT[4]))
                        push!(mIHT_FPR, parse(Float64, mIHT[5]))

                        # parse uIHT result
                        uIHT = match(regex, readline(io))
                        push!(uIHT_time, parse(Float64, uIHT[1]))
                        push!(uIHT_plei_power, parse(Float64, uIHT[2]))
                        push!(uIHT_indp_power, parse(Float64, uIHT[3]))
                        push!(uIHT_FP, parse(Float64, uIHT[4]))
                        push!(uIHT_FPR, parse(Float64, uIHT[5]))

                        # parse mvPLINK result
                        mvPLINK = match(regex, readline(io))
                        push!(mvPLINK_time, parse(Float64, mvPLINK[1]))
                        push!(mvPLINK_plei_power, parse(Float64, mvPLINK[2]))
                        push!(mvPLINK_indp_power, parse(Float64, mvPLINK[3]))
                        push!(mvPLINK_FP, parse(Float64, mvPLINK[4]))
                        push!(mvPLINK_FPR, parse(Float64, mvPLINK[5]))
                        push!(mvPLINK_λ, parse(Float64, mvPLINK[6]))

                        # parse mvPLINK result
                        gemma = match(regex, readline(io))
                        push!(gemma_time, parse(Float64, gemma[1]))
                        push!(gemma_plei_power, parse(Float64, gemma[2]))
                        push!(gemma_indp_power, parse(Float64, gemma[3]))
                        push!(gemma_FP, parse(Float64, gemma[4]))
                        push!(gemma_FPR, parse(Float64, gemma[5]))
                        push!(gemma_λ, parse(Float64, gemma[6]))
                    end
                    successes += 1
                catch
                    continue
                end
            end
            
            # summary statistics
            mIHT_time_mean, mIHT_time_std = round(mean(mIHT_time), digits=1), round(std(mIHT_time), digits=1)
            mIHT_plei_TP_mean, mIHT_plei_TP_std = round(mean(mIHT_plei_power), digits=2), round(std(mIHT_plei_power), digits=2)
            mIHT_indp_TP_mean, mIHT_indp_TP_std = round(mean(mIHT_indp_power), digits=2), round(std(mIHT_indp_power), digits=2)
            mIHT_FP_mean, mIHT_FP_std = round(mean(mIHT_FP), digits=1), round(std(mIHT_FP), digits=1)
            
            uIHT_time_mean, uIHT_time_std = round(mean(uIHT_time), digits=1), round(std(uIHT_time), digits=1)
            uIHT_plei_TP_mean, uIHT_plei_TP_std = round(mean(uIHT_plei_power), digits=2), round(std(uIHT_plei_power), digits=2)
            uIHT_indp_TP_mean, uIHT_indp_TP_std = round(mean(uIHT_indp_power), digits=2), round(std(uIHT_indp_power), digits=2)
            uIHT_FP_mean, uIHT_FP_std = round(mean(uIHT_FP), digits=1), round(std(uIHT_FP), digits=1)
            
            mvPLINK_time_mean, mvPLINK_time_std = round(mean(mvPLINK_time), digits=1), round(std(mvPLINK_time), digits=1)
            mvPLINK_plei_TP_mean, mvPLINK_plei_TP_std = round(mean(mvPLINK_plei_power), digits=2), round(std(mvPLINK_plei_power), digits=2)
            mvPLINK_indp_TP_mean, mvPLINK_indp_TP_std = round(mean(mvPLINK_indp_power), digits=2), round(std(mvPLINK_indp_power), digits=2)
            mvPLINK_FP_mean, mvPLINK_FP_std = round(mean(mvPLINK_FP), digits=1), round(std(mvPLINK_FP), digits=1)
            mvPLINK_λ_mean, mvPLINK_λ_std = round(mean(mvPLINK_λ), digits=1), round(std(mvPLINK_λ), digits=1)
            
            gemma_time_mean, gemma_time_std = round(mean(gemma_time), digits=1), round(std(gemma_time), digits=1)
            gemma_plei_TP_mean, gemma_plei_TP_std = round(mean(gemma_plei_power), digits=2), round(std(gemma_plei_power), digits=2)
            gemma_indp_TP_mean, gemma_indp_TP_std = round(mean(gemma_indp_power), digits=2), round(std(gemma_indp_power), digits=2)
            gemma_FP_mean, gemma_FP_std = round(mean(gemma_FP), digits=1), round(std(gemma_FP), digits=1)
            gemma_λ_mean, gemma_λ_std = round(mean(gemma_λ), digits=1), round(std(gemma_λ), digits=1)
            
            println(summary_io, "set $set summary (successful run = $successes):")
            println(summary_io, "mIHT time = $mIHT_time_mean ± $mIHT_time_std, plei TP = $mIHT_plei_TP_mean ± $mIHT_plei_TP_std, indep TP = $mIHT_indp_TP_mean ± $mIHT_indp_TP_std, FP = $mIHT_FP_mean ± $mIHT_FP_std")
            println(summary_io, "uIHT time = $uIHT_time_mean ± $uIHT_time_std, plei TP = $uIHT_plei_TP_mean ± $uIHT_plei_TP_std, indep TP = $uIHT_indp_TP_mean ± $uIHT_indp_TP_std, FP = $uIHT_FP_mean ± $uIHT_FP_std")
            println(summary_io, "mvPLINK time = $mvPLINK_time_mean ± $mvPLINK_time_std, plei TP = $mvPLINK_plei_TP_mean ± $mvPLINK_plei_TP_std, indep TP = $mvPLINK_indp_TP_mean ± $mvPLINK_indp_TP_std, FP = $mvPLINK_FP_mean ± $mvPLINK_FP_std")
            println(summary_io, "gemma time = $gemma_time_mean ± $gemma_time_std, plei TP = $gemma_plei_TP_mean ± $gemma_plei_TP_std, indep TP = $gemma_indp_TP_mean ± $gemma_indp_TP_std, FP = $gemma_FP_mean ± $gemma_FP_std \n")
            
            if verbose
                println("set $set summary (successful run = $successes):")
                println("mIHT time = $mIHT_time_mean ± $mIHT_time_std, plei TP = $mIHT_plei_TP_mean ± $mIHT_plei_TP_std, indep TP = $mIHT_indp_TP_mean ± $mIHT_indp_TP_std, FP = $mIHT_FP_mean ± $mIHT_FP_std")
                println("uIHT time = $uIHT_time_mean ± $uIHT_time_std, plei TP = $uIHT_plei_TP_mean ± $uIHT_plei_TP_std, indep TP = $uIHT_indp_TP_mean ± $uIHT_indp_TP_std, FP = $uIHT_FP_mean ± $uIHT_FP_std")
                println("mvPLINK time = $mvPLINK_time_mean ± $mvPLINK_time_std, plei TP = $mvPLINK_plei_TP_mean ± $mvPLINK_plei_TP_std, indep TP = $mvPLINK_indp_TP_mean ± $mvPLINK_indp_TP_std, FP = $mvPLINK_FP_mean ± $mvPLINK_FP_std")
                println("gemma time = $gemma_time_mean ± $gemma_time_std, plei TP = $gemma_plei_TP_mean ± $gemma_plei_TP_std, indep TP = $gemma_indp_TP_mean ± $gemma_indp_TP_std, FP = $gemma_FP_mean ± $gemma_FP_std \n")
            end
            
            # for latex table
#             if verbose
#                 println("set $set summary (successful run = $successes):")
#                 println("\\texttt{mIHT} & \$$mIHT_time_mean \\pm $mIHT_time_std\$ & \$$mIHT_plei_TP_mean \\pm $mIHT_plei_TP_std\$ & \$$mIHT_indp_TP_mean \\pm $mIHT_indp_TP_std\$ & \$$mIHT_FP_mean \\pm $mIHT_FP_std\$\\\\")
#                 println("\\texttt{uIHT} & \$$uIHT_time_mean \\pm $uIHT_time_std\$ & \$$uIHT_plei_TP_mean \\pm $uIHT_plei_TP_std\$ & \$$uIHT_indp_TP_mean \\pm $uIHT_indp_TP_std\$ & \$$uIHT_FP_mean \\pm $uIHT_FP_std\$\\\\")
#                 println("\\texttt{CCA} & \$$mvPLINK_time_mean \\pm $mvPLINK_time_std\$ & \$$mvPLINK_plei_TP_mean \\pm $mvPLINK_plei_TP_std\$ & \$$mvPLINK_indp_TP_mean \\pm $mvPLINK_indp_TP_std\$ & \$$mvPLINK_FP_mean \\pm $mvPLINK_FP_std\$\\\\")
#                 println("\\texttt{mvLMM} & \$$gemma_time_mean \\pm $gemma_time_std\$ & \$$gemma_plei_TP_mean \\pm $gemma_plei_TP_std\$ & \$$gemma_indp_TP_mean \\pm $gemma_indp_TP_std\$ & \$$gemma_FP_mean \\pm $gemma_FP_std\$\\\\ \n")
#             end
        end
    end
    
    return nothing
end
read_summary()

set 1 summary (successful run = 100):
\texttt{mIHT} & $164.6 \pm 69.3$ & $0.92 \pm 0.16$ & $0.76 \pm 0.2$ & $3.7 \pm 6.4$\\
\texttt{uIHT} & $114.9 \pm 48.6$ & $0.93 \pm 0.16$ & $0.72 \pm 0.2$ & $1.4 \pm 3.7$\\
\texttt{CCA} & $152.6 \pm 57.3$ & $0.96 \pm 0.14$ & $0.78 \pm 0.2$ & $77.8 \pm 40.6$\\
\texttt{mvLMM} & $307.7 \pm 121.4$ & $0.95 \pm 0.15$ & $0.76 \pm 0.2$ & $42.8 \pm 18.5$\\ 

set 2 summary (successful run = 99):
\texttt{mIHT} & $214.4 \pm 100.1$ & $0.91 \pm 0.12$ & $0.75 \pm 0.14$ & $5.7 \pm 6.0$\\
\texttt{uIHT} & $169.6 \pm 81.9$ & $0.86 \pm 0.16$ & $0.72 \pm 0.16$ & $2.4 \pm 2.5$\\
\texttt{CCA} & $226.8 \pm 101.9$ & $0.95 \pm 0.09$ & $0.79 \pm 0.15$ & $125.3 \pm 55.3$\\
\texttt{mvLMM} & $449.9 \pm 221.7$ & $0.93 \pm 0.1$ & $0.75 \pm 0.16$ & $66.1 \pm 22.8$\\ 

set 3 summary (successful run = 86):
\texttt{mIHT} & $227.9 \pm 41.1$ & $0.93 \pm 0.09$ & $0.73 \pm 0.12$ & $5.9 \pm 4.6$\\
\texttt{uIHT} & $213.8 \pm 45.7$ & $0.9 \pm 0.11$ & $0.69 \pm 0.12$ & $3.2 \pm 3.8$\\
\texttt