In [1]:
versioninfo()

Julia Version 1.3.1
Commit 2d5741174c (2019-12-30 21:36 UTC)
Platform Info:
  OS: macOS (x86_64-apple-darwin18.6.0)
  CPU: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-6.0.1 (ORCJIT, skylake)


# Systolic blood pressure as normal model

In [1]:
using DelimitedFiles
using DataFrames
using SnpArrays
using Statistics
using CSV
using MendelPlots

In [12]:
b = readdlm("sbp_model_12_beta_100iter");

In [8]:
findall(!iszero, b) # SNP position in .bim file

6-element Array{CartesianIndex{2},1}:
 CartesianIndex(2511, 1)  
 CartesianIndex(118568, 1)
 CartesianIndex(272187, 1)
 CartesianIndex(328030, 1)
 CartesianIndex(407795, 1)
 CartesianIndex(431724, 1)

In [6]:
b[findall(!iszero, b)] # estimated effect sizes

6-element Array{Float64,1}:
  0.430369589178061  
 -0.3915190862315371 
  0.358894859504808  
  0.32844523278890914
 -0.3301761855250657 
  0.3252279762883079 

In [13]:
c = readdlm("sbp_model_12_c_100iter") #non genetic covariates

16×1 Array{Float64,2}:
 2751.7440346723583  
   70.15711479660848 
    0.0              
  137.53898733036402 
  137.79189955973646 
   84.72010433986819 
    0.0              
    0.0              
    0.0              
    0.0              
    8.458565633074889
    0.0              
    0.0              
    0.0              
    0.0              
    0.0              

# Stage 2 hypertension as logistic model

In [16]:
b = readdlm("logistic_model_39_beta_500iter");

In [17]:
c = readdlm("logistic_model_39_c_500iter")

16×1 Array{Float64,2}:
 -0.27998509744996564 
  0.24113489780826114 
  0.0                 
  0.2763075731842723  
  0.24675671677714228 
  0.37847286824149645 
  0.0                 
  0.0                 
  0.0                 
  0.0                 
  0.023322530679080293
  0.0                 
  0.0                 
  0.0                 
  0.0                 
  0.0                 

In [21]:
findall(!iszero, b)[1:15] # SNP position in .bim file

15-element Array{CartesianIndex{2},1}:
 CartesianIndex(2511, 1)  
 CartesianIndex(61869, 1) 
 CartesianIndex(61875, 1) 
 CartesianIndex(118568, 1)
 CartesianIndex(121597, 1)
 CartesianIndex(141359, 1)
 CartesianIndex(141363, 1)
 CartesianIndex(154054, 1)
 CartesianIndex(158835, 1)
 CartesianIndex(174020, 1)
 CartesianIndex(213620, 1)
 CartesianIndex(226036, 1)
 CartesianIndex(272187, 1)
 CartesianIndex(278049, 1)
 CartesianIndex(283495, 1)

In [22]:
findall(!iszero, b)[16:end] # SNP position in .bim file

18-element Array{CartesianIndex{2},1}:
 CartesianIndex(284789, 1)
 CartesianIndex(293862, 1)
 CartesianIndex(295170, 1)
 CartesianIndex(300040, 1)
 CartesianIndex(300090, 1)
 CartesianIndex(307452, 1)
 CartesianIndex(328030, 1)
 CartesianIndex(331695, 1)
 CartesianIndex(353730, 1)
 CartesianIndex(377003, 1)
 CartesianIndex(379862, 1)
 CartesianIndex(379863, 1)
 CartesianIndex(381732, 1)
 CartesianIndex(407795, 1)
 CartesianIndex(424575, 1)
 CartesianIndex(431724, 1)
 CartesianIndex(432835, 1)
 CartesianIndex(453563, 1)

In [23]:
b[findall(!iszero, b)][1:15] # estimated effect sizes

15-element Array{Float64,1}:
  0.04645497463264396 
  0.019797519063123055
  0.021912320581498782
 -0.04808269947664778 
  0.030151444904165388
  0.04663568812608648 
  0.029442679535946814
  0.02769223778580454 
 -0.027493151488107104
 -0.02869178258606142 
 -0.03866882142006946 
 -0.03056223670827392 
  0.03911000623054243 
  0.027941833938835824
  0.03599772305782185 

In [24]:
b[findall(!iszero, b)][16:end] # estimated effect sizes

18-element Array{Float64,1}:
  0.03870234586118328 
 -0.028568969598374336
 -0.025477506110263438
  0.026189643624273617
 -0.035344298559485274
  0.04295342107189346 
  0.030100751507338236
  0.029525702932560856
 -0.02683239226642645 
 -0.026297999283077498
 -0.02101733033591335 
  0.021033828988575327
 -0.027462024631856714
 -0.042989544464641816
 -0.026463805048220695
  0.03508373546950311 
  0.03600049932412481 
 -0.02975510797575641 

## Check correlation between SNPs

In [4]:
x = SnpArray("/Users/biona001/Benjamin_Folder/UCLA/research/UKBdatafiles/ukb.plink.filtered.imputed.bed")

185565×470228 SnpArray:
 0x02  0x02  0x03  0x03  0x03  0x03  …  0x03  0x03  0x03  0x03  0x03  0x03
 0x03  0x03  0x03  0x03  0x03  0x03     0x00  0x03  0x03  0x03  0x03  0x03
 0x03  0x03  0x03  0x02  0x03  0x03     0x03  0x03  0x03  0x03  0x03  0x03
 0x03  0x03  0x03  0x03  0x03  0x03     0x02  0x03  0x03  0x03  0x03  0x03
 0x03  0x03  0x03  0x03  0x03  0x03     0x03  0x03  0x03  0x03  0x03  0x02
 0x02  0x02  0x03  0x03  0x03  0x03  …  0x03  0x03  0x03  0x03  0x03  0x03
 0x03  0x03  0x03  0x00  0x03  0x03     0x03  0x03  0x03  0x03  0x03  0x03
 0x03  0x03  0x03  0x03  0x02  0x03     0x03  0x03  0x03  0x03  0x03  0x03
 0x03  0x03  0x03  0x03  0x03  0x03     0x03  0x03  0x03  0x03  0x03  0x03
 0x03  0x03  0x03  0x03  0x03  0x03     0x03  0x03  0x03  0x03  0x03  0x03
 0x02  0x03  0x02  0x03  0x03  0x02  …  0x03  0x03  0x03  0x03  0x03  0x02
 0x03  0x03  0x03  0x02  0x03  0x03     0x03  0x03  0x03  0x03  0x02  0x03
 0x03  0x03  0x03  0x03  0x03  0x03     0x03  0x03  0x03  0x03  0x03  0x03
 

In [18]:
snp1 = convert(Vector{Float64}, @view(x[:, 61869]))
snp2 = convert(Vector{Float64}, @view(x[:, 61875]))
snp3 = convert(Vector{Float64}, @view(x[:, 379862]))
snp4 = convert(Vector{Float64}, @view(x[:, 379863]))

185565-element Array{Float64,1}:
 1.0
 2.0
 0.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 2.0
 ⋮  
 1.0
 2.0
 2.0
 0.0
 2.0
 2.0
 2.0
 0.0
 2.0
 2.0
 1.0
 1.0

In [19]:
@show cor(snp1, snp2)
@show cor(snp3, snp4)

cor(snp1, snp2) = 0.5865145979590505
cor(snp3, snp4) = -0.48978957537298295


-0.48978957537298295

## Effect size of recovered SNPs

In [11]:
# 12 known SNPs average effect size
(.046+ .048 + .03 + .046 + .039 + .039 + .028 + .039 + .03 + .021 + .036 + .03) / 12

0.036

In [13]:
# 21 SNPs not known to be associated with elevated SBP/DBP
(.02+.022+.029+.028+.027+.029+.031+.036+.029+.025+.026+.035+.043+.03+.027+.027+.021+.027+.043+.026+.035) / 21

0.029333333333333343

# Manhattan plot for logistic model

MUST execute code below in terminal, otherwise it will error. 

In [2]:
df = CSV.read("ukb.final.logistic.pval.txt")
b  = readdlm("logistic_model_39_beta_500iter");

In [3]:
# add new row to df. Entries are true if IHT's estimated beta is not zero
insertcols!(df, 7, :IHT => falses(size(df, 1)))
for i in eachindex(b)
    if b[i] != 0.0
        df[i, :IHT] = true
    end
end

In [4]:
result = manhattan(df)

│   caller = #manhattan#3(::String, ::String, ::Int64, ::String, ::String, ::Int64, ::Int64, ::String, ::Measures.Length{:mm,Float64}, ::String, ::String, ::String, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(manhattan), ::DataFrame) at gwasplots.jl:249
└ @ MendelPlots /Users/biona001/.julia/packages/MendelPlots/X3Apb/src/gwasplots.jl:249
│     df[!, col_ind] = v
│     df
│ end` instead.
│   caller = #manhattan#3(::String, ::String, ::Int64, ::String, ::String, ::Int64, ::Int64, ::String, ::Measures.Length{:mm,Float64}, ::String, ::String, ::String, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(manhattan), ::DataFrame) at gwasplots.jl:249
└ @ MendelPlots /Users/biona001/.julia/packages/MendelPlots/X3Apb/src/gwasplots.jl:249
│   caller = #manhattan#3(::String, ::String, ::Int64, ::String, ::String, ::Int64, ::Int64, ::String, ::Measures.Length{:mm,Float64}, ::String, ::String, ::String, ::Base.Iterators.Pairs{Union{

UndefVarError: UndefVarError: libpango not defined