# Test and Benchmark

In [1]:
versioninfo()

Julia Version 0.5.2
Commit f4c6c9d4bb (2017-05-06 16:34 UTC)
Platform Info:
  OS: macOS (x86_64-apple-darwin13.4.0)
  CPU: Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
  WORD_SIZE: 64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.7.1 (ORCJIT, broadwell)


## Haplotyping on complete genotype data

In [83]:
using NullableArrays, BenchmarkTools, MendelImpute

srand(123)
n, p, d = 10000, 1000, 50
H = convert(Matrix{Float32}, rand(0:1, d, p))
X = convert(Matrix{Float32}, rand(0:2, n, p))
M   = A_mul_Bt(H, H)
for j in 1:d
    for i in 1:(j - 1)
        M[i, j] = 2M[i, j] + M[i, i] + M[j, j]
    end
end
for j in 1:d
    M[j, j] *= 4
end
N   = A_mul_Bt(X, H)
for I in eachindex(N)
    N[I] *= 2
end
happair  = zeros(Int, n), zeros(Int, n)
hapscore = zeros(eltype(N), n)
@time haplopair!(happair, hapscore, M, N)

  0.009869 seconds (4 allocations: 160 bytes)


In [84]:
happair

([1,17,8,37,5,10,1,14,3,3  …  16,23,7,25,3,13,5,3,3,11],[16,48,12,39,21,30,42,18,18,18  …  35,26,47,50,39,35,24,17,18,43])

In [85]:
hapscore

10000-element Array{Float32,1}:
 -611.0
 -622.0
 -629.0
 -666.0
 -569.0
 -613.0
 -640.0
 -669.0
 -550.0
 -690.0
 -665.0
 -595.0
 -621.0
    ⋮  
 -578.0
 -595.0
 -680.0
 -546.0
 -680.0
 -528.0
 -687.0
 -587.0
 -603.0
 -646.0
 -590.0
 -613.0

In [86]:
haplopair(X, H)

(([1,17,8,37,5,10,1,14,3,3  …  16,23,7,25,3,13,5,3,3,11],[16,48,12,39,21,30,42,18,18,18  …  35,26,47,50,39,35,24,17,18,43]),Float32[1043.0,1056.0,1025.0,1020.0,1055.0,1059.0,1070.0,1043.0,1039.0,1029.0  …  1021.0,1087.0,1052.0,1067.0,1058.0,1033.0,1057.0,1024.0,1012.0,1046.0])

In [87]:
@benchmark haplopair!(happair, hapscore, M, N)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     9.263 ms (0.00% GC)
  median time:      9.423 ms (0.00% GC)
  mean time:        9.781 ms (0.00% GC)
  maximum time:     15.706 ms (0.00% GC)
  --------------
  samples:          511
  evals/sample:     1

In [88]:
Profile.clear()
@profile haplopair!(happair, hapscore, M, N)
Profile.print(format=:flat)

 Count File                        Line Function                               
    10 ./<missing>                   -1 anonymous                              
    11 ./loading.jl                 441 include_string(::String, ::String)     
    10 ./profile.jl                  16 macro expansion;                       
     1 ./simdloop.jl                 71 macro expansion                        
     9 ./simdloop.jl                 73 macro expansion                        
    11 ./task.jl                    360 (::IJulia.##13#19)()                   
    11 ...IJulia/src/eventloop.jl     8 eventloop(::ZMQ.Socket)                
    11 .../src/execute_request.jl   156 execute_request(::ZMQ.Socket, ::IJu... 
    10 ...pute/src/haplotyping.jl    30 haplopair!(::Tuple{Array{Int64,1},A... 
     1 ...pute/src/haplotyping.jl    31 macro expansion                        
     3 ...pute/src/haplotyping.jl    32 macro expansion                        
     1 ...pute/src/haplotyping.jl    34 

In [89]:
#@code_warntype haplopair!(X, H, M, N, happair, hapscore)

In [90]:
@time haplopair!(X, H, M, N, happair, hapscore)

  0.031918 seconds (4 allocations: 160 bytes)


In [91]:
@benchmark haplopair!(X, H, M, N, happair, hapscore)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     20.709 ms (0.00% GC)
  median time:      21.214 ms (0.00% GC)
  mean time:        21.348 ms (0.00% GC)
  maximum time:     24.684 ms (0.00% GC)
  --------------
  samples:          234
  evals/sample:     1

In [92]:
Profile.clear()
@profile haplopair!(X, H, M, N, happair, hapscore)
Profile.print(format=:flat)

 Count File                        Line Function                               
    19 ./<missing>                   -1 anonymous                              
     4 ./linalg/blas.jl             969 gemm!(::Char, ::Char, ::Float32, ::... 
     4 ./linalg/matmul.jl           155 A_mul_Bt!                              
     4 ./linalg/matmul.jl           331 gemm_wrapper!(::Array{Float32,2}, :... 
    19 ./loading.jl                 441 include_string(::String, ::String)     
    19 ./profile.jl                  16 macro expansion;                       
    14 ./simdloop.jl                 73 macro expansion                        
     1 ./simdloop.jl                 74 macro expansion                        
    19 ./task.jl                    360 (::IJulia.##13#19)()                   
    19 ...IJulia/src/eventloop.jl     8 eventloop(::ZMQ.Socket)                
    19 .../src/execute_request.jl   156 execute_request(::ZMQ.Socket, ::IJu... 
    12 ...pute/src/haplotyping.jl    30 

## Haplotyping on incomplete genotype data



In [5]:
using MendelImpute, NullableArrays, BenchmarkTools

srand(123)
n, p, d = 10000, 1000, 50
H = convert(Matrix{Float32}, rand(0:1, d, p))
X = convert(Matrix{Float32}, rand(0:2, n, p))
M = zeros(eltype(H), d, d)
N = zeros(eltype(X), n, d)
happair  = zeros(Int, n), zeros(Int, n)
hapscore = zeros(eltype(N), n)
missingprop = 0.1
Xm = NullableArray(X, full(sprand(Bool, n, p, missingprop)))
Xm.values[Xm.isnull] = 1
@time haploimpute!(Xm, H, M, N, happair, hapscore)

  0.055833 seconds (4 allocations: 160 bytes)


In [6]:
@benchmark haploimpute!(Xm, H, M, N, happair, hapscore)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     50.062 ms (0.00% GC)
  median time:      50.423 ms (0.00% GC)
  mean time:        50.782 ms (0.00% GC)
  maximum time:     59.527 ms (0.00% GC)
  --------------
  samples:          99
  evals/sample:     1

In [7]:
Profile.clear()
@profile haploimpute!(Xm, H, M, N, happair, hapscore)
Profile.print(format=:flat)

 Count File                        Line Function                               
    48 ./<missing>                   -1 anonymous                              
     1 ./abstractarray.jl           485 copy!(::Array{Any,1}, ::Core.Infere... 
     1 ./inference.jl              1101 abstract_call(::Any, ::Array{Any,1}... 
     1 ./inference.jl               893 abstract_call_gf_by_type(::Any, ::A... 
     2 ./inference.jl              1152 abstract_eval(::Any, ::Array{Any,1}... 
     1 ./inference.jl              1105 abstract_eval_call(::Expr, ::Array{... 
     1 ./inference.jl              1131 abstract_eval_call(::Expr, ::Array{... 
     1 ./inference.jl              1577 typeinf_edge(::Method, ::Any, ::Sim... 
     1 ./inference.jl              1597 typeinf_edge(::Method, ::Any, ::Sim... 
     1 ./inference.jl              1603 typeinf_edge(::Method, ::Any, ::Sim... 
     1 ./inference.jl              1621 typeinf_ext(::LambdaInfo)              
     1 ./inference.jl              1786 

## `AFRped.txt` data



In [7]:
#;ls -al

In [8]:
## cut out the first 6 fields in the pedigree file
#;cut -d ',' -f7- AFRped.txt > AFRped_geno.txt

In [9]:
#;ls -al

In [25]:
rawdata = readcsv("AFRped_geno.txt", Float32)

1325×36499 Array{Float32,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0 

In [26]:
people = 664
X = rawdata[1:people, 1:(end - 1)]

664×36498 Array{Float32,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  

In [27]:
if eltype(rawdata) <: Integer
    H = rawdata[(people + 1):end, 1:(end - 1)] .>> 1
else
    H = rawdata[(people + 1):end, 1:(end - 1)] / 2
end

661×36498 Array{Float32,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  

In [28]:
# clean up rawdata
rawdata = []

0-element Array{Any,1}

In [30]:
using BenchmarkTools, MendelImpute, SnpArrays

X1 = X[:, 1:1200]
H1 = H[:, 1:1200]
missingprop = 0.1
Xm = NullableArray(X1, full(sprand(Bool, size(X1, 1), size(X1, 2), missingprop)))
missing_true = X1[Xm.isnull]
Xm.values[Xm.isnull] = NaN
S = SnpArray(X1)
copy!(X1, S; impute = true) # initilize missing genotypes

664×1200 Array{Float32,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  2.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  2.0  0.0  0.0  0.0  1.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  1.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     2.0  2.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  1.0  0.0  0.0  0.0  1.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  1.0  1.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     2.0  2.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     1.0  1.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  1.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0

In [33]:
n, d = size(X1, 1), size(H1, 1)
M = zeros(eltype(H), d, d)
N = zeros(eltype(X), n, d)
happair  = zeros(Int, n, 2)
hapscore = zeros(eltype(N), n)
@time haploimpute!(Xm, H1, M, N, happair, hapscore)
missing_impute = Xm.values[Xm.isnull]
error = countnz(missing_true .≠ missing_impute) / length(missing_true)

  0.203312 seconds (6 allocations: 224 bytes)


0.04190502533412123

In [35]:
@benchmark haploimpute!(Xm, H1, M, N, happair, hapscore)

BenchmarkTools.Trial: 
  memory estimate:  64 bytes
  allocs estimate:  2
  --------------
  minimum time:     162.402 ms (0.00% GC)
  median time:      179.922 ms (0.00% GC)
  mean time:        180.324 ms (0.00% GC)
  maximum time:     194.203 ms (0.00% GC)
  --------------
  samples:          28
  evals/sample:     1