# GWAS Analysis using Linear Mixed Effect Models

In [1]:
versioninfo()

Julia Version 0.6.4
Commit 9d11f62bcb (2018-07-09 19:09 UTC)
Platform Info:
  OS: macOS (x86_64-apple-darwin14.5.0)
  CPU: Intel(R) Core(TM) i7-6567U CPU @ 3.30GHz
  WORD_SIZE: 64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=16)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.9.1 (ORCJIT, skylake)


### Load packages 

In [2]:
# packages from openMendel
using SnpArrays,VarianceComponentModels
# packages from Julia base
using Distributions

### Read in family structure and trait

In [3]:
pedLMM = readcsv("SNP_29a.fam", Any; header = false)
Trait1 = convert(Vector{Float64}, pedLMM[:, 7])
#Trait2 = convert(Vector{Float64}, pedLMM[:, 8])
#Y = [Trait1 Trait2]
sex = map(x -> strip(x) == "F"? 1.0 : 0.0,  pedLMM[:, 5])

212-element Array{Float64,1}:
 1.0
 1.0
 0.0
 0.0
 1.0
 1.0
 0.0
 0.0
 1.0
 0.0
 1.0
 0.0
 1.0
 ⋮  
 0.0
 0.0
 0.0
 1.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [4]:
Trait1

212-element Array{Float64,1}:
 29.2056
 31.8018
 37.8214
 35.0804
 28.329 
 36.1793
 42.881 
 40.9832
 35.5504
 48.0605
 37.6057
 46.3675
 35.0578
  ⋮     
 38.8598
 42.2151
 40.2603
 36.0904
 37.7272
 40.46  
 36.5984
 40.1652
 44.1222
 37.7962
 38.9626
 40.5131

### Read in genotypes and calculate GRM

In [5]:
snpbinLMM = SnpArray("SNP_29a")
maf, = summarize(snpbinLMM)
snpbinLMM = snpbinLMM[:, maf .> 0.05]
ex29agrm = grm(snpbinLMM; method = :GRM)

[1m[36mINFO: [39m[22m[36mv1.0 BED file detected
[39m

212×212 Array{Float64,2}:
  0.498264     0.0080878    0.0164327   …   0.0246825    0.00181856
  0.0080878    0.498054    -0.0212599      -0.0285927   -0.0226525 
  0.0164327   -0.0212599    0.499442       -0.0219661   -0.00748536
  0.253627    -0.00160532   0.282542        0.00612693  -0.00339125
  0.126098     0.253365     0.128931       -0.0158446   -0.00633959
 -0.014971    -0.00266073  -0.00243384  …   0.00384757   0.0145936 
 -0.0221357    0.0100492   -0.0107012      -0.0148443   -0.00127783
 -0.01629     -0.00749253  -0.015372       -0.0163305   -0.00258392
 -0.016679     0.00353587  -0.0128844      -0.0332489   -0.00707839
 -0.0176101   -0.00996912  -0.0158473      -0.00675875  -0.0122339 
 -0.0162558    0.00938592   0.0064231   …  -0.00510882   0.0168778 
 -0.0167487    0.00414544  -0.00936538     -0.0134863    0.0020952 
 -0.031148     0.00112387  -0.010794        0.00383105   0.0198635 
  ⋮                                     ⋱   ⋮                      
 -0.00865735  -0.00335

In [6]:
# columns are: :chrom, :snpid, :?, :pos, :allele1, :allele2
snpLMM = readdlm("SNP_29a.bim"; header = false)
snpLMM = snpLMM[maf .> 0.05,:]
snpid = map(x -> strip(string(x)), snpLMM[:, 2])

137741-element Array{AbstractString,1}:
 "rs56343121" 
 "rs56182540" 
 "rs7260412"  
 "rs8106297"  
 "rs8106302"  
 "rs186451972"
 "rs76534612" 
 "rs1975526"  
 "rs62103026" 
 "rs62103027" 
 "rs67286684" 
 "rs59441037" 
 "rs57114567" 
 ⋮            
 "rs140264059"
 "rs62114717" 
 "rs188169422"
 "rs139879509"
 "rs143250448"
 "rs145384750"
 "rs149215836"
 "rs139221927"
 "rs181848453"
 "rs186913222"
 "rs141816674"
 "rs150801216"

### A continuous univariate phenotype (can be multivariate)

In [7]:
n, snps = size(snpbinLMM)
maf, = summarize(snpbinLMM)
X = [ones(n) sex]
p = size(X,2)  # no. covariates
n, snps, p

(212, 137741, 2)

### Prepare to fit LmmGWAS

In [8]:
# fit null model once to store nessary information for alternative model 
nulldata    = VarianceComponentVariate(Trait1, X, (2ex29agrm, eye(n)))
nulldatarot = TwoVarCompVariateRotate(nulldata)
nullmodel   = VarianceComponentModel(nulldata)

VarianceComponentModels.VarianceComponentModel{Float64,2,Array{Float64,2},Array{Float64,2}}([0.0; 0.0], ([1.0], [1.0]), Array{Float64}(0,2), Char[], Float64[], -Inf, Inf)

In [9]:
algorithm = :MM

:MM

In [10]:
if algorithm == :MM
    logl_null,_,_,Σcov, = mle_mm!(nullmodel, nulldatarot; verbose = true)
elseif algorithm == :FS
    logl_null,_,_,Σcov, = mle_fs!(nullmodel, nulldatarot; verbose = true)
end


     MM Algorithm
  Iter      Objective  
--------  -------------
       0  -1.487551e+05

******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit http://projects.coin-or.org/Ipopt
******************************************************************************

       1  -6.257354e+02
       2  -5.420582e+02
       3  -5.116720e+02
       4  -5.006695e+02
       5  -4.957735e+02
       6  -4.927804e+02
       7  -4.905160e+02
       8  -4.886679e+02
       9  -4.871412e+02
      10  -4.858883e+02
      20  -4.813052e+02
      30  -4.807972e+02
      40  -4.807340e+02
      50  -4.807255e+02
      60  -4.807244e+02
      70  -4.807242e+02



(-480.72419822635624, VarianceComponentModels.VarianceComponentModel{Float64,2,Array{Float64,2},Array{Float64,2}}([40.1916; -6.29444], ([4.87898], [1.92549]), Array{Float64}(0,2), Char[], Float64[], -Inf, Inf), ([1.05478], [0.559104]), [1.11255 -0.407883; -0.407883 0.312597], [0.172756; 0.31492], [0.0298447 -0.0453772; -0.0453772 0.0991749])

### Heritablity of `Trait1`

In [11]:
h, hse = heritability(nullmodel.Σ, Σcov)

([0.717026], [0.0622537])

In [12]:
## fit alternative model with SNPs, push null model info to alternative model 
T = eltype(sex)
altdatarot = TwoVarCompVariateRotate(nulldatarot.Yrot,
    zeros(T, n, size(X, 1) + 1), nulldatarot.eigval, nulldatarot.eigvec,
    nulldatarot.logdetV2)
copy!(altdatarot.Xrot, nulldatarot.Xrot) # last column ramains zero
altmodel = VarianceComponentModel(altdatarot)

VarianceComponentModels.VarianceComponentModel{Float64,2,Array{Float64,2},Array{Float64,2}}([0.0; 0.0; … ; 0.0; 0.0], ([1.0], [1.0]), Array{Float64}(0,213), Char[], Float64[], -Inf, Inf)

### Loop over all SNPs to calculate LRT pvalues for LmmGWAS

In [None]:
pvalue   = ones(snps)
genovec  = zeros(T, n)
testrun  = 100
#@time for snp in 1:testrun
@time for snp in 1:snps
    # append (rotated) genotype vector to covariate matrix
    copy!(genovec, snpbinLMM[:, snp]; impute = true, center = true, scale = true)
    altdatarot.Xrot[:, end] = At_mul_B(altdatarot.eigvec, genovec)
    # initialize mean effects to null model fit
    fill!(altmodel.B, zero(T))
    copy!(altmodel.B, nullmodel.B)
    copy!(altmodel.Σ[1], nullmodel.Σ[1])
    copy!(altmodel.Σ[2], nullmodel.Σ[2])
    # fit alternative model
    if algorithm == :MM
        logl_alt, vcmodel_mle, Σse, Σcov, Bse, Bcov = mle_mm!(altmodel, altdatarot; verbose = false)
    elseif algorithm == :FS
        logl_alt, = mle_fs!(altmodel, altdatarot; verbose = false)
    end
    # LRT statistics and its pvalue
    lrt = - 2(logl_null - logl_alt)
    pvalue[snp] = ccdf(Chisq(1), lrt)
#    println(snp,": ", hapmap_snpdata.snpid[snp], 
    if mod(snp, 1000) == 1
            println(snp)
    end    
#        "\n\tMAF: ", @sprintf("%0.3f", maf[snp]), 
#        "\n\tLRT p: ", @sprintf("%0.3f", pvalue[snp]))
end

1
1001
2001


### Output results to file

In [None]:
using DataFrames
using CSV

In [None]:
#maf, = summarize(snpbinLMM)
#plot_frame = DataFrame(snpid = snpLMM[:,2],
#    AdjBasepairs = snpLMM[:,4], 
#    Chromosome = snpLMM[:,1], 
 #   MAF = maf,
 #   NegativeLogPvalue = -log10.(pvalue))
plot_frame = CSV.read("lmmGWAS_output.txt")

### Manhattan Plot

In [None]:
using Plots                             # From package Plots.
gr(size=(2000,1600))
# testrun was set up early 
#plot_frame = DataFrame(AdjBasepairs = snpLMM[1:testrun,4], 
#    Chromosome = snpLMM[1:testrun,1], 
#    NegativeLogPvalue = -log10.(pvalue[1:testrun]))

# for full datasets
# plot_frame = DataFrame(AdjBasepairs = snpLMM[:,4], 
#    Chromosome = snpLMM[:,1], 
#    NegativeLogPvalue = -log10.(pvalue))

plt = scatter(plot_frame[:AdjBasepairs], plot_frame[:NegativeLogPvalue],
      group = plot_frame[:Chromosome],
      markersize = 10, markerstrokewidth = 0, color_palette = :rainbow)

xticks = by(plot_frame, :Chromosome,
        plot_frame -> mean(plot_frame[:AdjBasepairs]))
xaxis!(plt, xticks = (sort(xticks[:x1])[1:2:end], 
        xticks[1:2:size(xticks, 1),:Chromosome]),
      font(30))
xaxis!(plt, xlabel = "Chromosome")
#
# Add the y-axis information.
#
yaxis!(plt, ylabel = "-log10(p-value)",font(20))
#
# Use a grey grid and remove the legend.
#
plot!(plt, gridcolor = :lightgrey, legend = false)
#
# Add an overall title.
#
#Plots.scalefontsizes(5)
plot!(plt, title = "Manhattan Plot", window_title = "Manhattan Plot")
#
# Add a dashed horizontal line that indicates the Bonferonni threshold.
#
Plots.abline!(plt, 0, -log10(.05 / size(plot_frame,1)), color = :black,
        line = :dash)