# Minimizing allocations in phasing

In [1]:
using Revise
using VCFTools
using MendelImpute
using GeneticVariation
using Random
using SparseArrays
using JLD2, FileIO, JLSO
using ProgressMeter
using GroupSlices
using ThreadPools
using BenchmarkTools
using StatsBase
using StaticArrays
# using Plots
# using ProfileView

In [2]:
Threads.nthreads()

8

# Optimize window by window intersection

# using array of int

Seems like `intersect!` in Base is allocating a lot. Its implementation is confusing.

In [4]:
@btime intersect!(x, y) setup=(x = [1, 2, 3]; y = [1, 4])

  311.573 ns (15 allocations: 1.05 KiB)


1-element Array{Int64,1}:
 1

In [11]:
@which intersect!([1, 2, 3], [1, 4])

## Try writing our own non-allocating intersect

In our application, the 2 integer vectors are sorted and elements are unique. The code below doesn't assume such, so there may be faster implementations.

In [8]:
"""
    intersect!(v::AbstractVector, u::AbstractVector, seen::BitSet=BitSet())

Computes `v ∩ u` in place and stores result in `v`. 

# Arguments
- `v`: An integer vector
- `u`: An integer vector
- `seen`: Preallocated storage container
"""
function Base.intersect!(
    v::AbstractVector{<:Integer}, 
    u::AbstractVector{<:Integer}, 
    seen::AbstractSet
    )
    empty!(seen)
    for i in u
        push!(seen, i)
    end
    for i in Iterators.reverse(eachindex(v))
        @inbounds v[i] ∉ seen && deleteat!(v, i)
    end
    nothing
end

"""
    intersect_size(v::AbstractVector, u::AbstractVector, seen::BitSet=BitSet())

Computes the size of `v ∩ u` in place. Assumes `v` is usually smaller than `u`
and each element in `v` is unique.

# Arguments
- `v`: An integer vector
- `u`: An integer vector
- `seen`: Preallocated storage container
"""
function intersect_size(
    v::AbstractVector{<:Integer}, 
    u::AbstractVector{<:Integer}, 
    seen::AbstractSet=BitSet()
    )
    empty!(seen)
    for i in u
        push!(seen, i)
    end
    s = 0
    for i in eachindex(v)
        @inbounds v[i] ∈ seen && (s += 1)
    end
    return s
end
intersect_size(v::AbstractVector, u::Integer, seen) = u in v

intersect_size (generic function with 3 methods)

In [15]:
# correctness
seen = BitSet()
sizehint!(seen, 10000)
x = [1, 3, 4, 5, 7, 9]
y = [2, 3, 5, 6]
@show intersect_size(x, y, seen)
intersect!(x, y, seen)
@show x
@show y;

intersect_size(x, y, seen) = 2
x = [3, 5]
y = [2, 3, 5, 6]


### Timings

In [25]:
# Julia built in
@btime intersect!(x, y) setup=(x = rand(1:10000, 1000); y = rand(1:10000, 1000));

  69.754 μs (35 allocations: 95.62 KiB)


In [23]:
seen = BitSet()
sizehint!(seen, 10000)
@btime intersect!(x, y, $seen) setup=(x = rand(1:10000, 1000); y = rand(1:10000, 1000));

  3.286 μs (0 allocations: 0 bytes)


In [24]:
seen = BitSet()
sizehint!(seen, 10000)
@btime intersect_size(x, y, $seen) setup=(x = rand(1:10000, 1000); y = rand(1:10000, 1000));

  2.465 μs (0 allocations: 0 bytes)


In [3]:
# first import all data, declare a bunch of (needed or not) variables, and look at 1 window
cd("/Users/biona001/.julia/dev/MendelImpute/simulation")
Random.seed!(2020)
width   = 512
tgtfile = "./compare2/target.typedOnly.maf0.01.masked.vcf.gz"
reffile = "./compare2/ref.excludeTarget.w$width.jlso"
loaded = JLSO.load(reffile)
compressed_Hunique = loaded[:compressed_Hunique]
X, X_sampleID, X_chr, X_pos, X_ids, X_ref, X_alt = VCFTools.convert_gt(UInt8, tgtfile, 
    trans=true, save_snp_info=true, msg = "Importing genotype file...");

# first person's optimal haplotype in each window (complete index)
happair1_original = [9, 9, 30, 218, 31, 31, 86, 30, 86, 218, 163, 163, 45, 45, 163, 687, 
    3, 3, 6, 687, 3, 170, 212, 687, 328, 687, 48, 67, 7, 7, 7, 7, 7, 7, 169, 169, 156, 
    156, 169, 169, 336, 539, 34, 300, 300, 300, 260, 284, 284, 1, 91, 91, 14, 104, 131, 
    131, 548, 8, 8, 8, 8, 8, 8, 183, 8, 23, 6, 117, 754, 190, 16, 16]
happair2_original = [5509, 45, 218, 5509, 218, 173, 218, 218, 218, 687, 218, 218, 163, 163, 
    1837, 709, 32, 687, 128, 1312, 202, 687, 277, 709, 328, 709, 475, 687, 687, 98, 98, 274, 
    169, 169, 709, 601, 709, 709, 384, 709, 709, 687, 171, 687, 426, 426, 284, 300, 539, 
    76, 617, 104, 104, 131, 1837, 140, 687, 687, 144, 687, 687, 233, 70, 233, 23, 1837, 
    23, 899, 2392, 1538, 78, 754];

[32mImporting genotype file...100%|█████████████████████████| Time: 0:00:07[39m


In [9]:
happair1 = copy(happair1_original)
happair2 = copy(happair2_original)
seen = BitSet()
survivors1=Int32[]
survivors2=Int32[]
sizehint!(seen, 60000)
sizehint!(survivors1, 60000)
sizehint!(survivors2, 60000)

@time phase_sample!(happair1, happair2, compressed_Hunique, seen, survivors1, survivors2)

  0.002382 seconds (4 allocations: 160 bytes)


In [7]:
seen = BitSet()
survivors1=Int32[]
survivors2=Int32[]
sizehint!(seen, 60000)
sizehint!(survivors1, 60000)
sizehint!(survivors2, 60000)

@btime phase_sample!(happair1, happair2, $compressed_Hunique, $seen, $survivors1,
    $survivors2) setup=(happair1=copy(happair1_original);happair2 = 
    copy(happair2_original))

  1.521 ms (0 allocations: 0 bytes)


# Try optimize overall phasing

First save intermediate results for quick loading later

In [3]:
Random.seed!(2020)
width   = 512
tgtfile = "./compare2/target.typedOnly.maf0.01.masked.vcf.gz"
reffile = "./compare2/ref.excludeTarget.w$width.jlso"

loaded = JLSO.load(reffile)
compressed_Hunique = loaded[:compressed_Hunique]

X, X_sampleID, X_chr, X_pos, X_ids, X_ref, X_alt = 
    VCFTools.convert_gt(UInt8, tgtfile, trans=true, 
    save_snp_info=true, msg = "Importing genotype file...")

people = size(X, 2)
tgt_snps = size(X, 1)
ref_snps = length(compressed_Hunique.pos)
windows = floor(Int, tgt_snps / width)
num_unique_haps = round(Int, avg_haplotypes_per_window(compressed_Hunique))

# working arrays
ph = [HaplotypeMosaicPair(ref_snps) for i in 1:people]
haplotype1 = [zeros(Int32, windows) for i in 1:people]
haplotype2 = [zeros(Int32, windows) for i in 1:people]

haptimers = MendelImpute.compute_optimal_haplotypes!(haplotype1, haplotype2, 
    compressed_Hunique, X, X_pos, nothing, nothing,
    false, 1000, false)

JLSO.save("./compare2/haplotypes.w512", :haplotype1 => haplotype1, :haplotype2 => haplotype2);

[32mImporting genotype file...100%|█████████████████████████| Time: 0:00:07[39m


Load intermediate results

In [13]:
Random.seed!(2020)
width   = 512
tgtfile = "./compare2/target.typedOnly.maf0.01.masked.vcf.gz"
reffile = "./compare2/ref.excludeTarget.w$width.jlso"

refloaded = JLSO.load(reffile)
compressed_Hunique = deepcopy(refloaded[:compressed_Hunique])

X, X_sampleID, X_chr, X_pos, X_ids, X_ref, X_alt = 
    VCFTools.convert_gt(UInt8, tgtfile, trans=true, 
    save_snp_info=true, msg = "Importing genotype file...")

haploaded = JLSO.load("./compare2/haplotypes.w512")
haplotype1 = copy(haploaded[:haplotype1])
haplotype2 = copy(haploaded[:haplotype2])
ref_snps = length(compressed_Hunique.pos)
people = size(X, 2)
ph = [HaplotypeMosaicPair(ref_snps) for i in 1:people];

[32mImporting genotype file...100%|█████████████████████████| Time: 0:00:06[39m


In [15]:
compressed_Hunique = deepcopy(refloaded[:compressed_Hunique])
haplotype1 = copy(haploaded[:haplotype1])
haplotype2 = copy(haploaded[:haplotype2])
ph = [HaplotypeMosaicPair(ref_snps) for i in 1:people];
@time phasetimer = phase_fast!(ph, X, compressed_Hunique, haplotype1, haplotype2);

  0.036440 seconds (235.35 k allocations: 28.268 MiB)


In [16]:
@btime phasetimer = phase_fast!(ph, $X, compressed_Hunique, haplotype1, haplotype2
    ) setup=(ph = [HaplotypeMosaicPair(ref_snps) for i in 1:people];haplotype1 = 
    copy(haploaded[:haplotype1]); haplotype2 = 
    copy(haploaded[:haplotype2]); compressed_Hunique = deepcopy(
    refloaded[:compressed_Hunique]));

  35.100 ms (233599 allocations: 28.24 MiB)
