# Test and Benchmark

In [1]:
versioninfo()

Julia Version 0.5.2
Commit f4c6c9d4bb (2017-05-06 16:34 UTC)
Platform Info:
  OS: macOS (x86_64-apple-darwin13.4.0)
  CPU: Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz
  WORD_SIZE: 64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.7.1 (ORCJIT, haswell)


## Haplotyping on complete genotype data

In [2]:
using NullableArrays, BenchmarkTools, MendelImpute

srand(123)
n, p, d = 10000, 1000, 50
H = convert(Matrix{Float32}, rand(0:1, d, p))
X = convert(Matrix{Float32}, rand(0:2, n, p))
M   = A_mul_Bt(H, H)
for j in 1:d
    for i in 1:(j - 1)
        M[i, j] = 2M[i, j] + M[i, i] + M[j, j]
    end
end
for j in 1:d
    M[j, j] *= 4
end
N   = A_mul_Bt(X, H)
for I in eachindex(N)
    N[I] *= 2
end
happair  = zeros(Int, n), zeros(Int, n)
hapscore = zeros(eltype(N), n)
@time haplopair!(happair, hapscore, M, N)

  0.020970 seconds (9.22 k allocations: 402.803 KB)


In [3]:
happair

([1,17,8,37,5,10,1,14,3,3  …  16,23,7,25,3,13,5,3,3,11],[16,48,12,39,21,30,42,18,18,18  …  35,26,47,50,39,35,24,17,18,43])

In [4]:
hapscore

10000-element Array{Float32,1}:
 -611.0
 -622.0
 -629.0
 -666.0
 -569.0
 -613.0
 -640.0
 -669.0
 -550.0
 -690.0
 -665.0
 -595.0
 -621.0
    ⋮  
 -578.0
 -595.0
 -680.0
 -546.0
 -680.0
 -528.0
 -687.0
 -587.0
 -603.0
 -646.0
 -590.0
 -613.0

In [5]:
haplopair(X, H)

(([1,17,8,37,5,10,1,14,3,3  …  16,23,7,25,3,13,5,3,3,11],[16,48,12,39,21,30,42,18,18,18  …  35,26,47,50,39,35,24,17,18,43]),Float32[1043.0,1056.0,1025.0,1020.0,1055.0,1059.0,1070.0,1043.0,1039.0,1029.0  …  1021.0,1087.0,1052.0,1067.0,1058.0,1033.0,1057.0,1024.0,1012.0,1046.0])

In [6]:
@benchmark haplopair!(happair, hapscore, M, N)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     9.319 ms (0.00% GC)
  median time:      9.513 ms (0.00% GC)
  mean time:        9.494 ms (0.00% GC)
  maximum time:     10.793 ms (0.00% GC)
  --------------
  samples:          527
  evals/sample:     1

In [7]:
Profile.clear()
@profile haplopair!(happair, hapscore, M, N)
Profile.print(format=:flat)

 Count File                        Line Function                               
     8 ./<missing>                   -1 anonymous                              
     1 ./inference.jl              1536 typeinf_edge(::Method, ::Any, ::Sim... 
     1 ./inference.jl              1621 typeinf_ext(::LambdaInfo)              
     9 ./loading.jl                 441 include_string(::String, ::String)     
     8 ./profile.jl                  16 macro expansion;                       
     9 ./task.jl                    360 (::IJulia.##13#19)()                   
     9 ...IJulia/src/eventloop.jl     8 eventloop(::ZMQ.Socket)                
     9 .../src/execute_request.jl   156 execute_request(::ZMQ.Socket, ::IJu... 
     2 ...pute/src/haplotyping.jl    32 haplopair!(::Tuple{Array{Int64,1},A... 
     4 ...pute/src/haplotyping.jl    33 haplopair!(::Tuple{Array{Int64,1},A... 
     2 ...pute/src/haplotyping.jl    36 haplopair!(::Tuple{Array{Int64,1},A... 


In [9]:
#@code_warntype haplopair!(X, H, M, N, happair, hapscore)

In [10]:
@time haplopair!(X, H, M, N, happair, hapscore)

  0.023348 seconds (4 allocations: 160 bytes)


In [11]:
@benchmark haplopair!(X, H, M, N, happair, hapscore)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     20.115 ms (0.00% GC)
  median time:      20.261 ms (0.00% GC)
  mean time:        20.435 ms (0.00% GC)
  maximum time:     29.652 ms (0.00% GC)
  --------------
  samples:          245
  evals/sample:     1

In [12]:
Profile.clear()
@profile haplopair!(X, H, M, N, happair, hapscore)
Profile.print(format=:flat)

 Count File                        Line Function                               
    13 ./<missing>                   -1 anonymous                              
     2 ./linalg/blas.jl             969 gemm!(::Char, ::Char, ::Float32, ::... 
     2 ./linalg/matmul.jl           155 A_mul_Bt!                              
     2 ./linalg/matmul.jl           331 gemm_wrapper!(::Array{Float32,2}, :... 
    13 ./loading.jl                 441 include_string(::String, ::String)     
    13 ./profile.jl                  16 macro expansion;                       
     2 ./simdloop.jl                 73 macro expansion                        
    13 ./task.jl                    360 (::IJulia.##13#19)()                   
    13 ...IJulia/src/eventloop.jl     8 eventloop(::ZMQ.Socket)                
    13 .../src/execute_request.jl   156 execute_request(::ZMQ.Socket, ::IJu... 
     4 ...pute/src/haplotyping.jl    32 haplopair!(::Tuple{Array{Int64,1},A... 
     3 ...pute/src/haplotyping.jl    33 

## Haplotyping on incomplete genotype data



In [14]:
using MendelImpute, NullableArrays, BenchmarkTools

srand(123)
n, p, d = 10000, 1000, 50
H = convert(Matrix{Float32}, rand(0:1, d, p))
X = convert(Matrix{Float32}, rand(0:2, n, p))
M = zeros(eltype(H), d, d)
N = zeros(eltype(X), n, d)
happair  = zeros(Int, n), zeros(Int, n)
hapscore = zeros(eltype(N), n)
missingprop = 0.1
Xm = NullableArray(X, full(sprand(Bool, n, p, missingprop)))
@time haploimpute!(Xm, H, M, N, happair, hapscore)

  0.085485 seconds (4 allocations: 160 bytes)


In [15]:
@benchmark haploimpute!(Xm, H, M, N, happair, hapscore)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     85.755 ms (0.00% GC)
  median time:      87.582 ms (0.00% GC)
  mean time:        87.665 ms (0.00% GC)
  maximum time:     90.458 ms (0.00% GC)
  --------------
  samples:          58
  evals/sample:     1

In [16]:
Profile.clear()
@profile haploimpute!(Xm, H, M, N, happair, hapscore)
Profile.print(format=:flat)

 Count File                        Line Function                               
    69 ./<missing>                   -1 anonymous                              
     4 ./linalg/blas.jl             969 gemm!(::Char, ::Char, ::Float32, ::... 
     4 ./linalg/matmul.jl           155 A_mul_Bt!                              
     4 ./linalg/matmul.jl           331 gemm_wrapper!(::Array{Float32,2}, :... 
    69 ./loading.jl                 441 include_string(::String, ::String)     
    69 ./profile.jl                  16 macro expansion;                       
     2 ./simdloop.jl                 73 macro expansion                        
    69 ./task.jl                    360 (::IJulia.##13#19)()                   
    69 ...IJulia/src/eventloop.jl     8 eventloop(::ZMQ.Socket)                
    69 .../src/execute_request.jl   156 execute_request(::ZMQ.Socket, ::IJu... 
     3 ...pute/src/haplotyping.jl   146 fillmissing!(::NullableArrays.Nulla... 
     6 ...pute/src/haplotyping.jl   147 

## `AFRped.txt` data



In [17]:
#;ls -al

In [18]:
## cut out the first 6 fields in the pedigree file
#;cut -d ',' -f7- AFRped.txt > AFRped_geno.txt

In [19]:
#;ls -al

In [1]:
rawdata = readcsv("AFRped_geno.txt", Float32)

1325×36499 Array{Float32,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0 

In [2]:
people = 664
X = rawdata[1:people, 1:(end - 1)]

664×36498 Array{Float32,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  

In [3]:
if eltype(rawdata) <: Integer
    H = rawdata[(people + 1):end, 1:(end - 1)] .>> 1
else
    H = rawdata[(people + 1):end, 1:(end - 1)] / 2
end

661×36498 Array{Float32,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  

In [4]:
# clean up rawdata
rawdata = []

0-element Array{Any,1}

### phase+impute a fixed window

In [5]:
using BenchmarkTools, MendelImpute, NullableArrays

X1 = X[:, 1:1200]
H1 = H[:, 1:1200]
missingprop = 0.1
Xm = NullableArray(X1, full(sprand(Bool, size(X1, 1), size(X1, 2), missingprop)))
missing_true = X1[Xm.isnull]

79742-element Array{Float32,1}:
 0.0
 1.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 ⋮  
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [6]:
n, d = size(X1, 1), size(H1, 1)
M = zeros(eltype(H), d, d)
N = zeros(eltype(X), n, d)
happair  = zeros(Int, n), zeros(Int, n)
hapscore = zeros(eltype(N), n)
@time haploimpute!(Xm, H1, M, N, happair, hapscore)
missing_impute = Xm.values[Xm.isnull]
error = countnz(missing_true .≠ missing_impute) / length(missing_true)

  0.569513 seconds (465.36 k allocations: 16.780 MB)


0.0033733791477514987

In [7]:
@benchmark haploimpute!(Xm, H1, M, N, happair, hapscore)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     116.388 ms (0.00% GC)
  median time:      117.693 ms (0.00% GC)
  mean time:        118.753 ms (0.00% GC)
  maximum time:     127.732 ms (0.00% GC)
  --------------
  samples:          43
  evals/sample:     1

In [8]:
Profile.clear()
@profile haploimpute!(Xm, H1, M, N, happair, hapscore)
Profile.print(format=:flat)

 Count File                        Line Function                               
    90 ./<missing>                   -1 anonymous                              
     3 ./abstractarray.jl           485 copy!(::Array{Any,1}, ::Core.Infere... 
     2 ./inference.jl              1101 abstract_call(::Any, ::Array{Any,1}... 
     2 ./inference.jl               893 abstract_call_gf_by_type(::Any, ::A... 
     5 ./inference.jl              1152 abstract_eval(::Any, ::Array{Any,1}... 
     3 ./inference.jl              1105 abstract_eval_call(::Expr, ::Array{... 
     2 ./inference.jl              1131 abstract_eval_call(::Expr, ::Array{... 
     3 ./inference.jl              1597 typeinf_edge(::Method, ::Any, ::Sim... 
     2 ./inference.jl              1603 typeinf_edge(::Method, ::Any, ::Sim... 
     1 ./inference.jl              1621 typeinf_ext(::LambdaInfo)              
     3 ./inference.jl              1786 typeinf_frame(::Core.Inference.Infe... 
     2 ./inference.jl              1660 

## Phase + impute by moving window

In [9]:
using BenchmarkTools, MendelImpute, NullableArrays

srand(123)
missingprop = 0.1
Xm = NullableArray(X, full(sprand(Bool, size(X, 1), size(X, 2), missingprop)))
missing_true = X[Xm.isnull]

@time haploimpute!(Xm, H, 400)

Imputing SNPs 1:400
Imputing SNPs 401:800
Imputing SNPs 801:1200
Imputing SNPs 1201:1600
Imputing SNPs 1601:2000
Imputing SNPs 2001:2400
Imputing SNPs 2401:2800
Imputing SNPs 2801:3200
Imputing SNPs 3201:3600
Imputing SNPs 3601:4000
Imputing SNPs 4001:4400
Imputing SNPs 4401:4800
Imputing SNPs 4801:5200
Imputing SNPs 5201:5600
Imputing SNPs 5601:6000
Imputing SNPs 6001:6400
Imputing SNPs 6401:6800
Imputing SNPs 6801:7200
Imputing SNPs 7201:7600
Imputing SNPs 7601:8000
Imputing SNPs 8001:8400
Imputing SNPs 8401:8800
Imputing SNPs 8801:9200
Imputing SNPs 9201:9600
Imputing SNPs 9601:10000
Imputing SNPs 10001:10400
Imputing SNPs 10401:10800
Imputing SNPs 10801:11200
Imputing SNPs 11201:11600
Imputing SNPs 11601:12000
Imputing SNPs 12001:12400
Imputing SNPs 12401:12800
Imputing SNPs 12801:13200
Imputing SNPs 13201:13600
Imputing SNPs 13601:14000
Imputing SNPs 14001:14400
Imputing SNPs 14401:14800
Imputing SNPs 14801:15200
Imputing SNPs 15201:15600
Imputing SNPs 15601:16000
Imputing SNPs 16

In [10]:
missing_impute = Xm.values[Xm.isnull]
error = countnz(missing_true .≠ missing_impute) / length(missing_true)

0.004465366698388301

In [11]:
Profile.clear()
@profile haploimpute!(Xm, H, 400)
Profile.print(format=:flat)

Imputing SNPs 1:400
Imputing SNPs 401:800
Imputing SNPs 801:1200
Imputing SNPs 1201:1600
Imputing SNPs 1601:2000
Imputing SNPs 2001:2400
Imputing SNPs 2401:2800
Imputing SNPs 2801:3200
Imputing SNPs 3201:3600
Imputing SNPs 3601:4000
Imputing SNPs 4001:4400
Imputing SNPs 4401:4800
Imputing SNPs 4801:5200
Imputing SNPs 5201:5600
Imputing SNPs 5601:6000
Imputing SNPs 6001:6400
Imputing SNPs 6401:6800
Imputing SNPs 6801:7200
Imputing SNPs 7201:7600
Imputing SNPs 7601:8000
Imputing SNPs 8001:8400
Imputing SNPs 8401:8800
Imputing SNPs 8801:9200
Imputing SNPs 9201:9600
Imputing SNPs 9601:10000
Imputing SNPs 10001:10400
Imputing SNPs 10401:10800
Imputing SNPs 10801:11200
Imputing SNPs 11201:11600
Imputing SNPs 11601:12000
Imputing SNPs 12001:12400
Imputing SNPs 12401:12800
Imputing SNPs 12801:13200
Imputing SNPs 13201:13600
Imputing SNPs 13601:14000
Imputing SNPs 14001:14400
Imputing SNPs 14401:14800
Imputing SNPs 14801:15200
Imputing SNPs 15201:15600
Imputing SNPs 15601:16000
Imputing SNPs 16