In [1]:
using Revise
using SnpArrays
using LinearAlgebra
using Random
using LoopVectorization
using MendelIHT
using BenchmarkTools

┌ Info: Precompiling SnpArrays [4e780e97-f5bf-4111-9dc4-b70aaf691b06]
└ @ Base loading.jl:1317
┌ Info: Precompiling MendelIHT [921c7187-1484-5754-b919-5d3ed9ac03c4]
└ @ Base loading.jl:1317


# $C = AB$ correctness

If we want to center/scale the SnpArray, we have
$$
C_{ij} = \sum_{k} \left(\frac{A_{ik} - \mu_k}{\sigma_k^2}\right)B_{kj} = \sum_{k} \frac{A_{ik}B_{kj} - \mu_kB_{kj}}{\sigma_k^2}
$$
+ $C = m \times p$
+ $A = m \times n$
+ $B = n \times p$
+ SnpArray model = Additive, dominant, recessive

In [4]:
model = ADDITIVE_MODEL
center = true
scale = true
impute = true
m = 4098
n = 1025
p = 1025
x = simulate_random_snparray(undef, m, n)
if impute
    for j in 1:n, i in 1:m
        rand() < 0.01 && (x[i, j] = 0x01) # create ~1% missings
    end
end

A = SnpLinAlg{Float64}(x, model=model, impute=impute, center=center, scale=scale)
B = ones(n, p)
C = zeros(m, p)
LinearAlgebra.mul!(C, A, B)

Ctrue = convert(Matrix{Float64}, x, impute=impute, model=model, center=center, scale=scale) * B
@show all(C .≈ Ctrue)

all(C .≈ Ctrue) = true


true

## Speed: SnpLinAlg-(matrix) vs multiple SnpLinAlg-vector

In [96]:
# C = AB by multiple C[:, i] = AB[:, i]
function adhoc_mul!(
    out::AbstractMatrix, 
    st::AbstractSnpLinAlg,
    v::AbstractMatrix)
    for i in 1:size(v, 2)
        outi = @view(out[:, i])
        vi = @view(v[:, i])
        SnpArrays.mul!(outi, st, vi)
    end
end

adhoc_mul! (generic function with 1 method)

### r = 2

In [106]:
n = 5340   # number of samples
p = 24523  # number of SNPs
r = 2      # number of traits
x = simulate_random_snparray(undef, n, p)

# test correctness
A = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, impute=true, center=true, scale=true)
B = ones(p, r)
C = zeros(n, r)
Ctest = zeros(n, r)
LinearAlgebra.mul!(C, A, B)
adhoc_mul!(Ctest, A, B)
all(Ctest .≈ C)

true

In [107]:
@benchmark LinearAlgebra.mul!($C, $A, $B) # SnpLinAlg-matrix

BenchmarkTools.Trial: 
  memory estimate:  128 bytes
  allocs estimate:  1
  --------------
  minimum time:     79.696 ms (0.00% GC)
  median time:      82.416 ms (0.00% GC)
  mean time:        83.003 ms (0.00% GC)
  maximum time:     96.007 ms (0.00% GC)
  --------------
  samples:          61
  evals/sample:     1

In [109]:
@benchmark adhoc_mul!($Ctest, $A, $B) # multiple SnpLinAlg-vector

BenchmarkTools.Trial: 
  memory estimate:  192 bytes
  allocs estimate:  2
  --------------
  minimum time:     114.517 ms (0.00% GC)
  median time:      122.829 ms (0.00% GC)
  mean time:        122.406 ms (0.00% GC)
  maximum time:     126.932 ms (0.00% GC)
  --------------
  samples:          41
  evals/sample:     1

In [31]:
Afloat = convert(Matrix{Float64}, A)
BLAS.set_num_threads(8)
@benchmark LinearAlgebra.mul!($C, $Afloat, $B) # BLAS with 8 threads

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     45.535 ms (0.00% GC)
  median time:      46.517 ms (0.00% GC)
  mean time:        46.781 ms (0.00% GC)
  maximum time:     53.563 ms (0.00% GC)
  --------------
  samples:          107
  evals/sample:     1

In [32]:
BLAS.set_num_threads(1)
@benchmark LinearAlgebra.mul!($C, $Afloat, $B) # BLAS with 1 threads

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     132.407 ms (0.00% GC)
  median time:      136.632 ms (0.00% GC)
  mean time:        138.222 ms (0.00% GC)
  maximum time:     153.705 ms (0.00% GC)
  --------------
  samples:          37
  evals/sample:     1

### r = 5

In [34]:
n = 5340
p = 24523
r = 5
x = simulate_random_snparray(undef, n, p)

# test correctness
A = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, impute=true, center=true, scale=true)
B = ones(p, r)
C = zeros(n, r)
Ctest = zeros(n, r)
LinearAlgebra.mul!(C, A, B)
adhoc_mul!(Ctest, A, B)
all(Ctest .≈ C)

true

In [35]:
@benchmark LinearAlgebra.mul!($C, $A, $B) # SnpLinAlg-matrix

BenchmarkTools.Trial: 
  memory estimate:  128 bytes
  allocs estimate:  1
  --------------
  minimum time:     132.495 ms (0.00% GC)
  median time:      135.236 ms (0.00% GC)
  mean time:        135.548 ms (0.00% GC)
  maximum time:     147.035 ms (0.00% GC)
  --------------
  samples:          37
  evals/sample:     1

In [36]:
@benchmark adhoc_mul!($Ctest, $A, $B)

BenchmarkTools.Trial: 
  memory estimate:  480 bytes
  allocs estimate:  5
  --------------
  minimum time:     230.470 ms (0.00% GC)
  median time:      236.705 ms (0.00% GC)
  mean time:        295.087 ms (0.00% GC)
  maximum time:     699.488 ms (0.00% GC)
  --------------
  samples:          19
  evals/sample:     1

In [37]:
Afloat = convert(Matrix{Float64}, A)
BLAS.set_num_threads(8)
@benchmark LinearAlgebra.mul!($C, $Afloat, $B) # BLAS with 8 threads

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     45.511 ms (0.00% GC)
  median time:      49.890 ms (0.00% GC)
  mean time:        51.769 ms (0.00% GC)
  maximum time:     68.548 ms (0.00% GC)
  --------------
  samples:          97
  evals/sample:     1

In [38]:
BLAS.set_num_threads(1)
@benchmark LinearAlgebra.mul!($C, $Afloat, $B) # BLAS with 1 threads

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     148.367 ms (0.00% GC)
  median time:      154.699 ms (0.00% GC)
  mean time:        155.224 ms (0.00% GC)
  maximum time:     166.649 ms (0.00% GC)
  --------------
  samples:          33
  evals/sample:     1

# $C = A^tB$ correctness

If we want to center/scale the SnpArray, we have
$$
C_{ij} = \sum_{k} \left(\frac{A_{ki} - \mu_k}{\sigma_k^2}\right)B_{kj} = \sum_{k} \frac{A_{ki}B_{kj} - \mu_kB_{kj}}{\sigma_k^2}
$$
+ $C = n \times p$
+ $A = m \times n$
+ $B = n \times p$
+ SnpArray model = Additive, dominant, recessive

In [6]:
model = ADDITIVE_MODEL
center = false
scale = false
impute = false
m = 4098 # 4097 will crash
n = 1024
p = 1024
x = simulate_random_snparray(undef, m, n)
if impute
    for j in 1:n, i in 1:m
        rand() < 0.01 && (x[i, j] = 0x01) # create ~1% missings
    end
end

A = SnpLinAlg{Float64}(x, model=model, impute=impute, center=center, scale=scale)
B = ones(m, p)
C = zeros(n, p)
LinearAlgebra.mul!(C, Transpose(A), B)

Ctrue = convert(Matrix{Float64}, x, impute=impute, model=model, center=center, scale=scale)' * B
@show all(C .≈ Ctrue)

M = 1024, Miter = 1, Mrem = 2
N = 1024, Niter = 1, Nrem = 0
P = 1024, Piter = 1, Prem = 0
all(C .≈ Ctrue) = true


true

## $Ax$ bug

In [84]:
# before fixing issue 90, any n between 4097 and 4099 doesn't work!
n = 4099
p = 1000
x = simulate_random_snparray(undef, n, p, min_ma=0)

A = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, impute=false, center=false, scale=false)
b = ones(p)
c = A * b
ctrue = convert(Matrix{Float64}, A) * b
@test all(c .≈ ctrue)

rows_filled = 4099
M = 1024, Miter = 1, Mrem = 3
N = 1000, Niter = 0, Nrem = 1000


[32m[1mTest Passed[22m[39m

In [17]:
using Revise
using SnpArrays
using LinearAlgebra
using Random
using LoopVectorization
using MendelIHT
using Test
using BenchmarkTools

# after fixing issue 90, if n is 1024, 2048 or 3072, still won't work!
n = 4097
p = 2048
x = simulate_random_snparray(undef, n, p) #defined in my package `MendelIHT.jl`

x[1, 2048] = 0x01
x[2048, 1] = 0x01

A = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, impute=true, center=false, scale=false)
b = ones(p)
c = A * b
ctrue = convert(Matrix{Float64}, x, impute=true, center=false, scale=false) * b
@test all(c .≈ ctrue)

[32m[1mTest Passed[22m[39m

## $A^tx$ bug

In [61]:
# before fixing issue 90, any n between 8193 and 8195 doesn't work!
n = 8193
p = 1000
x = simulate_random_snparray(undef, n, p, min_ma=0)

A = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, impute=false, center=false, scale=false)
b = ones(n)
c = A' * b
ctrue = convert(Matrix{Float64}, A)' * b
@test all(c .≈ ctrue)

rows_filled = 8193
M = 2048, Miter = 1, Mrem = 1
N = 1000, Niter = 0, Nrem = 1000


[32m[1mTest Passed[22m[39m

In [89]:
using Revise
using SnpArrays
using LinearAlgebra
using Random
using LoopVectorization
using MendelIHT
using Test
using BenchmarkTools

# after fixing issue 90, if n is 2048, 4096 or 6144, still won't work!
n = 8192+6144
p = 2048
x = simulate_random_snparray(undef, n, p) #defined in my package `MendelIHT.jl`

# x[1, 8193] = 0x01
# x[8193, 1] = 0x01

A = SnpLinAlg{Float64}(x, model=ADDITIVE_MODEL, impute=true, center=false, scale=false)
b = ones(n)
c = A' * b
ctrue = convert(Matrix{Float64}, x, impute=true, center=false, scale=false)' * b
@test all(c .≈ ctrue)

rows_filled = 14336
M = 3584, Miter = 1, Mrem = 6144
N = 2048, Niter = 1, Nrem = 0


[32m[1mTest Passed[22m[39m