# Test and Benchmark

In [1]:
versioninfo()

Julia Version 0.5.2
Commit f4c6c9d4bb (2017-05-06 16:34 UTC)
Platform Info:
  OS: macOS (x86_64-apple-darwin13.4.0)
  CPU: Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
  WORD_SIZE: 64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.7.1 (ORCJIT, broadwell)


## Haplotyping on complete genotype data

In [5]:
using NullableArrays, BenchmarkTools, MendelImpute

srand(123)
n, p, d = 10000, 1000, 50
H = rand(0:1, d, p)
X = rand(0:2, n, p)
#@code_warntype haplopair(X, H)
M   = A_mul_Bt(H, H)
for j in 1:d
    for i in 1:(j - 1)
        M[i, j] = 2M[i, j] + M[i, i] + M[j, j]
    end
end
for j in 1:d
    M[j, j] *= 4
end
N   = A_mul_Bt(X, H)
N .*= 2
happair  = zeros(Int, n, 2)
hapscore = zeros(eltype(N), n)
@time haplopair!(happair, hapscore, M, N);

  0.009789 seconds (5 allocations: 192 bytes)


In [6]:
happair

10000×2 Array{Int64,2}:
  1  16
 17  48
  8  12
 37  39
  5  21
 10  30
  1  42
 14  18
  3  18
  3  18
 19  44
 14  18
 36  48
  ⋮    
  9  12
 20  35
 16  35
 23  26
  7  47
 25  50
  3  39
 13  35
  5  24
  3  17
  3  18
 11  43

In [7]:
hapscore

10000-element Array{Int64,1}:
 -611
 -622
 -629
 -666
 -569
 -613
 -640
 -669
 -550
 -690
 -665
 -595
 -621
    ⋮
 -578
 -595
 -680
 -546
 -680
 -528
 -687
 -587
 -603
 -646
 -590
 -613

In [8]:
haplopair(X, H)

(
[1 16; 17 48; … ; 3 18; 11 43],

[1043,1056,1025,1020,1055,1059,1070,1043,1039,1029  …  1021,1087,1052,1067,1058,1033,1057,1024,1012,1046])

In [9]:
@benchmark haplopair!(happair, hapscore, M, N)

BenchmarkTools.Trial: 
  memory estimate:  32 bytes
  allocs estimate:  1
  --------------
  minimum time:     9.048 ms (0.00% GC)
  median time:      9.300 ms (0.00% GC)
  mean time:        9.479 ms (0.00% GC)
  maximum time:     12.843 ms (0.00% GC)
  --------------
  samples:          527
  evals/sample:     1

In [10]:
fill!(happair, 0)
fill!(hapscore, 0)

Profile.clear()
@profile haplopair!(happair, hapscore, M, N)
Profile.print(format=:flat)

 Count File                        Line Function                               
    11 ./<missing>                   -1 anonymous                              
     1 ./abstractarray.jl           485 copy!(::Array{Any,1}, ::Core.Infere... 
     2 ./inference.jl              1101 abstract_call(::Any, ::Array{Any,1}... 
     1 ./inference.jl               770 abstract_call_gf_by_type(::Any, ::A... 
     1 ./inference.jl               893 abstract_call_gf_by_type(::Any, ::A... 
     3 ./inference.jl              1152 abstract_eval(::Any, ::Array{Any,1}... 
     1 ./inference.jl              1105 abstract_eval_call(::Expr, ::Array{... 
     2 ./inference.jl              1131 abstract_eval_call(::Expr, ::Array{... 
     1 ./inference.jl              1254 abstract_interpret(::Any, ::Array{A... 
     2 ./inference.jl              1597 typeinf_edge(::Method, ::Any, ::Sim... 
     1 ./inference.jl              1603 typeinf_edge(::Method, ::Any, ::Sim... 
     1 ./inference.jl              1621 

## Haplotyping on incomplete genotype data



In [12]:
srand(123)
n, p, d = 10000, 1000, 50
H = rand(0:1, d, p)
X = rand(0:2, n, p)
M = zeros(eltype(H), d, d)
N = zeros(eltype(X), n, d)
happair  = zeros(Int, n, 2)
hapscore = zeros(eltype(N), n)
missingprop = 0.1
Xm = NullableArray(X, BitArray(sprand(Bool, n, p, 0.1)))
Xm.values[Xm.isnull] = 1
@time haploimpute!(Xm, H, M, N, happair, hapscore)

LoadError: UndefVarError: haploimpute! not defined

In [8]:
#@benchmark haploimpute!(X, H, M, N, happair, hapscore, missingidx)

In [10]:
Profile.clear()
@profile haploimpute!(X, H, M, N, happair, hapscore, missingidx)
Profile.print(format=:flat)

 Count File                        Line Function                               
   374 ./<missing>                   -1 anonymous                              
     1 ./abstractarray.jl           626 copy!(::Array{Int64,2}, ::UnitRange... 
     2 ./abstractarray.jl           648 copy_transpose!(::Array{Int64,2}, :... 
     1 ./abstractarray.jl           650 copy_transpose!(::Array{Int64,2}, :... 
    52 ./abstractarray.jl           651 copy_transpose!(::Array{Int64,2}, :... 
     1 ./arraymath.jl               111 .*(::Array{Int64,2}, ::Int64)          
    39 ./linalg/matmul.jl           504 _generic_matmatmul!(::Array{Int64,2... 
    18 ./linalg/matmul.jl           505 _generic_matmatmul!(::Array{Int64,2... 
     2 ./linalg/matmul.jl           507 _generic_matmatmul!(::Array{Int64,2... 
    39 ./linalg/matmul.jl           511 _generic_matmatmul!(::Array{Int64,2... 
   199 ./linalg/matmul.jl           512 _generic_matmatmul!(::Array{Int64,2... 
    55 ./linalg/matmul.jl           514 

## `AFRped.txt` data



In [7]:
#;ls -al

In [8]:
## cut out the first 6 fields in the pedigree file
#;cut -d ',' -f7- AFRped.txt > AFRped_geno.txt

In [9]:
#;ls -al

In [11]:
rawdata = readcsv("AFRped_geno.txt", Float64)

1325×36499 Array{Float64,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0 

In [12]:
people = 664
X = rawdata[1:people, 1:(end - 1)]

664×36498 Array{Float64,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  

In [13]:
if eltype(rawdata) <: Integer
    H = rawdata[(people + 1):end, 1:(end - 1)] .>> 1
else
    H = rawdata[(people + 1):end, 1:(end - 1)] / 2
end

661×36498 Array{Float64,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  1.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  

In [14]:
# clean up rawdata
rawdata = []

0-element Array{Any,1}

In [31]:
using BenchmarkTools, MendelImpute, SnpArrays

X1 = X[:, 1:1200]
H1 = H[:, 1:1200]
missingprop = 0.1
missingind = sort(randperm(length(X1))[1:round(Int, length(X1) * missingprop)], rev = false)
missingidx = ind2sub(X1, missingind)
missing_true = X1[missingind]
X1[missingind] = NaN
S = SnpArray(X1)
copy!(X1, S; impute = true) # initilize missing genotypes

664×1200 Array{Float64,2}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  2.0  2.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     2.0  2.0  0.0  0.0  0.0  1.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     1.0  1.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     2.0  2.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     1.0  1.0  0.0  0.0  0.0  1.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  1.0  1.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     2.0  2.0  0.0  0.0  0.0  0.0  0.0
 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     1.0  1.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  1.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  1.0  1.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0

In [33]:
n, d = size(X1, 1), size(H1, 1)
M = zeros(eltype(H), d, d)
N = zeros(eltype(X), n, d)
happair  = zeros(Int, n, 2)
hapscore = zeros(eltype(N), n)
@time haploimpute!(X1, H1, M, N, happair, hapscore, missingidx)
missing_impute = X1[missingind]
error = countnz(missing_true .≠ missing_impute) / length(missing_true)

  0.201472 seconds (8 allocations: 3.349 MB)


0.049711345381526106

In [30]:
[missing_true missing_impute]

53120×2 Array{Float64,2}:
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 ⋮       
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0
 0.0  0.0

In [20]:
@benchmark haploimpute!(X1, H1, M, N, happair, hapscore, missingidx)

BenchmarkTools.Trial: 
  memory estimate:  3.35 MiB
  allocs estimate:  4
  --------------
  minimum time:     157.677 ms (0.00% GC)
  median time:      177.018 ms (0.00% GC)
  mean time:        175.881 ms (0.12% GC)
  maximum time:     189.820 ms (0.00% GC)
  --------------
  samples:          29
  evals/sample:     1