# Shared Code

In [507]:
from __future__ import print_function
from __future__ import division

In [508]:
from collections import defaultdict

In [509]:
MARGIN = 200
MAX_ID = 100

In [510]:
import requests
def get_seqs(chrom, pos, ref, alt, margin):
    """Obtain reference and alternate sequences 
    from Ensembl.
    
    Returns (ref_seq, alt_seq) tuple
    """
    # Calculate start and end positions
    start = pos - margin
    end = pos + margin
    # Construct the URL for the REST query
    server = "http://grch37.rest.ensembl.org/"
    ext = "/sequence/region/human/{}:{}..{}:1?".format(chrom, start, end)
    # Send the HTTP request
    r = requests.get(server+ext, headers={ "Content-Type" : "text/plain"})
    # Extract reference sequence
    ref_seq = str(r.text)
    # Strip away any gaps when calculating length
    ref_len = len(ref.strip("-"))
    alt_len = len(alt.strip("-"))
    # Categorize the variant
    if ref_len < alt_len:  # Insertion
        prefix = ref_seq[:margin+1]
        suffix = ref_seq[margin+1:]
        alt_seq = prefix + alt + suffix
    elif ref_len > alt_len:  # Deletion
        prefix = ref_seq[:margin]
        suffix = ref_seq[margin+len(ref):]
        alt_seq = prefix + suffix
    else:  # SNP
        prefix = ref_seq[:margin]
        suffix = ref_seq[margin+1:]
        alt_seq = prefix + alt + suffix
    return ref_seq, alt_seq

In [511]:
def rev_comp(seq):
    """Return reverse complement"""
    cbases = {"A": "T",
              "T": "A",
              "G": "C",
              "C": "G",
              "N": "N"}
    comp = ""
    for base in seq[::-1]:
        comp += cbases[base]
    return comp

In [512]:
from tabulate import tabulate
def print_results(results, headers):
    """Return results in pretty format"""
    s_results = sorted(results.items(), key=lambda x: int(x[0]))
    table = [[int(i)] + [(method.get("ref_count", 0), method.get("alt_count", 0), 
                     method.get("amb_count", 0), method.get("vaf", 0)) for method in result] for i, result in s_results]
    return tabulate(table, headers)

In [513]:
# Parse indels file
indels = {}
headers = ["id", "chrom", "start", "end", "ref", "alt", "ref_count", "alt_count", "vaf"]
with open("indels.txt") as infile:
    for line in infile:
        # Parse line
        indel = dict(zip(headers, line.rstrip("\n").split("\t")))
        id_num = int(indel["id"])
        # Obtain sequences
        ref_seq, alt_seq = get_seqs(indel["chrom"], int(indel["start"]), indel["ref"], indel["alt"], margin=MARGIN)
        indel["ref_seq"], indel["alt_seq"] = ref_seq, alt_seq
        # Create SeqIndexSet objects for ref and alt sequences
        ref_idxs = SeqIndexSet(ref_seq)
        alt_idxs = SeqIndexSet(alt_seq)
        indel["ref_idxs"], indel["alt_idxs"] = ref_idxs, alt_idxs
        # Store them for later
        indels[id_num] = indel
        # Limit number of indels for now
        if id_num >= MAX_ID:
            break

In [514]:
# Build dictionary of indels holding the values predicted by various methods
# key: id
# value: list of dict(ref_count, alt_count, amb_count, vaf) for each method
HEADERS = ["id", "original", "kmer", "aln", "seeded_aln", "hybrid"]
results = defaultdict(lambda: [{}] * (len(HEADERS) - 1))

In [515]:
# Add original results from MAF file to results dict
for i in range(1, MAX_ID+1):
    # Iterate over reads
    indel = indels[i]
    results[indel["id"]][0] = {
            "ref_count": int(indel["ref_count"]),
            "alt_count": int(indel["alt_count"]),
            "amb_count": 0,
            "vaf": round(float(indel["vaf"]), 2)
        }

# K-mer Approach

In [516]:
# Some constants
K = 10
IVAL = 2

In [517]:
from random import shuffle
def kmer_iter(seq, k, step, ival):
    """Iterate over k-mers using the same 
    subsequence pattern.
    
    Yields (offset, kmer).
    """
    num_kmers = (len(seq) - k * ival)//step + 1
    kmer_ids = range(num_kmers)
    shuffle(kmer_ids)
    for i in kmer_ids:
        start = i*step
        end = i*step+k*ival
        kmer = seq[start:end:ival]
        yield start, kmer

In [518]:
class SeqIndexSet(object):
    
    def __init__(self, seq):
        self.seq = seq
        self.kmer_idxs = {}
    
    def get_idx(self, k, step, ival):
        """Return k-mer index. Create it if not 
        precomputed.
        """
        # Create param key
        key = (k, step, ival)
        # Check if precomputed
        if key in self.kmer_idxs:
            idx = self.kmer_idxs[key]
        else:
            # Create a new index
            idx = defaultdict(set)
            for offset, kmer in kmer_iter(self.seq, k, step, ival):
                idx[kmer].add(offset)
            # Store it for later
            self.kmer_idxs[key] = idx
        return idx

In [519]:
def kmer_count(seq, kmer_idx, k, step, ival):
    """Returns score for k-mers present
    in the given k-mer index.
    
    Returns the count/score.
    """
    kmer_count = 0
    num_kmers = (len(seq) - k)//step + 1
    for offset, kmer in kmer_iter(seq, k, step, ival):
        if kmer in kmer_idx:
            kmer_count += 1
    return kmer_count

In [520]:
def calc_kmer_delta(read_seq, ref_idxs, alt_idxs, min_delta=1, max_ival=3):
    """Determines whether read has more k-mers
    in common with reference sequence or alternate
    sequence. 
    
    abs(difference) >= min_delta
    Attempts with interval lengths <= max_ival
    
    Returns delta in score between the two.
    If positive, aligns better to reference.
    If negative, aligns better to alternate.
    If zero, abs(difference) < min_delta
    """
    ival = 1
    ref_score = 0
    alt_score = 0
    while (abs(ref_score - alt_score) < min_delta) and ival <= max_ival:
        # Generate k-mer indexes for this ival
        ref_idx = ref_idxs.get_idx(k=K, step=1, ival=ival)
        alt_idx = alt_idxs.get_idx(k=K, step=1, ival=ival)
        # Find ref scores for forward and reverse and take max
        ref_score += kmer_count(read_seq, ref_idx, k=K, step=1, ival=ival)
        # Find alt scores for forward and reverse and take max
        alt_score += kmer_count(read_seq, alt_idx, k=K, step=1, ival=ival)
        # Increment ival
        ival += 1
    if abs(ref_score - alt_score) < min_delta:
        delta = 0
    else:
        delta = ref_score - alt_score
    return delta

In [521]:
def is_forward(read_seq, ref_idxs):
    """Returns whether read is forward."""
    fread = read_seq
    rread = rev_comp(read_seq)
    ref_idx = ref_idxs.get_idx(k=K, step=1, ival=2)
    fscore = kmer_count(fread, ref_idx, k=K, step=1, ival=2)
    rscore = kmer_count(rread, ref_idx, k=K, step=1, ival=2)
    return fscore > rscore

In [522]:
%%time
for i in range(1, MAX_ID+1):
    # Iterate over reads
    temp = "reads/reads_{}.txt"
    with open(temp.format(i)) as reads:
        ref_count = 0
        alt_count = 0
        amb_count = 0
        indel = indels[i]
        ref_idxs = indel["ref_idxs"]
        alt_idxs = indel["alt_idxs"]
        # Iterate over reads
        for read in reads:
            read = read.rstrip("\n").replace("N", "A")
            # Reverse read if applicable
            if not is_forward(read, ref_idxs):
                read = rev_comp(read)
            # Calculate score delta
            kmer_delta = calc_kmer_delta(read, ref_idxs, alt_idxs, min_delta=3, max_ival=5)
            if kmer_delta > 0:
                ref_count += 1
            elif kmer_delta < 0:
                alt_count += 1
            else:
                amb_count += 1
        if alt_count + ref_count == 0:
            vaf = 0
        else:
            vaf = round(alt_count/(alt_count + ref_count), 2)
        results[indel["id"]][1] = {
            "ref_count": ref_count,
            "alt_count": alt_count,
            "amb_count": amb_count,
            "vaf": vaf
        }

CPU times: user 6.78 s, sys: 118 ms, total: 6.9 s
Wall time: 6.95 s


In [523]:
print(print_results(results, headers=HEADERS))

  id  original              kmer                   aln           seeded_aln    hybrid
----  --------------------  ---------------------  ------------  ------------  ------------
   1  (13, 7, 0, 0.35)      (16, 20, 7, 0.56)      (0, 0, 0, 0)  (0, 0, 0, 0)  (0, 0, 0, 0)
   2  (36, 0, 0, 0.0)       (33, 0, 9, 0.0)        (0, 0, 0, 0)  (0, 0, 0, 0)  (0, 0, 0, 0)
   3  (30, 0, 0, 0.0)       (30, 0, 4, 0.0)        (0, 0, 0, 0)  (0, 0, 0, 0)  (0, 0, 0, 0)
   4  (79, 0, 0, 0.0)       (71, 0, 19, 0.0)       (0, 0, 0, 0)  (0, 0, 0, 0)  (0, 0, 0, 0)
   5  (12, 0, 0, 0.0)       (8, 0, 12, 0.0)        (0, 0, 0, 0)  (0, 0, 0, 0)  (0, 0, 0, 0)
   6  (11, 0, 0, 0.0)       (11, 0, 5, 0.0)        (0, 0, 0, 0)  (0, 0, 0, 0)  (0, 0, 0, 0)
   7  (8, 0, 0, 0.0)        (8, 0, 7, 0.0)         (0, 0, 0, 0)  (0, 0, 0, 0)  (0, 0, 0, 0)
   8  (7, 4, 0, 0.36)       (7, 4, 0, 0.36)        (0, 0, 0, 0)  (0, 0, 0, 0)  (0, 0, 0, 0)
   9  (11, 0, 0, 0.0)       (10, 0, 4, 0.0)        (0, 0, 0, 0)  (0, 0, 0, 0)  (0, 0, 

# Alignment Approach

In [524]:
alphabet = ['A', 'C', 'G', 'T']
score = [[0, 4, 2, 4, 8],
         [4, 0, 4, 2, 8],
         [2, 4, 0, 4, 8],
         [4, 2, 4, 0, 8],
         [8, 8, 8, 8, 8]]

In [525]:
import numpy as np
def aln_score(read, ref, offset=None, margin=5):
    
    # Edit ref if offset is given
    if offset:
        ref = ref[offset-margin:offset+len(read)+margin]

    # Create distance matrix
    D = np.zeros((len(read)+1,len(ref)+1), dtype=np.int)
    
    # Initialize first row
    for i in range(1, len(ref)+1):
        D[0,i] = 0
    
    # Initialize first column
    for i in range(1, len(read)+1):
        D[i,0] = D[i-1,0] + score[alphabet.index(read[i-1])][-1]
        
    # Fill rest of the matrix
    for i in range(1, len(read)+1):
        for j in range(1, len(ref)+1):
            distHor = D[i,j-1] + score[-1][alphabet.index(ref[j-1])]
            distVer = D[i-1,j] + score[alphabet.index(read[i-1])][-1]
            distDiag = D[i-1,j-1] + score[alphabet.index(read[i-1])][alphabet.index(ref[j-1])]
            D[i][j] = min(distHor, distVer, distDiag)
    
    # Return min of bottom row
    return min(D[-1])

In [526]:
def calc_aln_delta(read_seq, ref_seq, alt_seq, min_delta=8, offset=None, margin=None):
    """Calculate difference in score between
    a local alignment to the reference sequence
    and one to the alternate sequence.
    
    Returns the difference in score.
    If positive, alignment to reference is better
    If negative, alignment to alternate is better
    If zero, abs(difference) < min_delta
    """
    ref_score = aln_score(read_seq, ref_seq, offset=offset, margin=margin)
    alt_score = aln_score(read_seq, alt_seq, offset=offset, margin=margin)
    if abs(ref_score - alt_score) < min_delta:
        delta = 0
    else:
        delta = -(ref_score - alt_score)
    return delta

In [527]:
%%time
for i in range(1, MAX_ID+1):
    # Iterate over reads
    temp = "reads/reads_{}.txt"
    with open(temp.format(i)) as reads:
        ref_count = 0
        alt_count = 0
        amb_count = 0
        indel = indels[i]
        ref_seq, alt_seq = indel["ref_seq"], indel["alt_seq"]
        ref_idxs = indel["ref_idxs"]
        for read in reads:
            read = read.rstrip("\n").replace("N", "A")
            # Reverse read if applicable
            if not is_forward(read, ref_idxs):
                read = rev_comp(read)
            # Find offset
            offset = find_offset(read, ref_idxs, k=K, step=1, ival=IVAL)
            # Determine if overlaps with mutation position
            if offset and not is_overlap(read, ref_seq, offset, min_olap=2):
                continue
            # Lower score is better
            aln_delta = calc_aln_delta(read, ref_seq, alt_seq)
            if aln_delta > 0:
                ref_count += 1
            elif aln_delta < 0:
                alt_count += 1
            else:
                amb_count += 1
        if alt_count + ref_count == 0:
            vaf = 0
        else:
            vaf = round(alt_count/(alt_count + ref_count), 2)
        results[indel["id"]][2] = {
            "ref_count": ref_count,
            "alt_count": alt_count,
            "amb_count": amb_count,
            "vaf": vaf
        }

CPU times: user 54min 26s, sys: 37.7 s, total: 55min 4s
Wall time: 54min 35s


In [528]:
print(print_results(results, headers=HEADERS))

  id  original              kmer                   aln                 seeded_aln    hybrid
----  --------------------  ---------------------  ------------------  ------------  ------------
   1  (13, 7, 0, 0.35)      (16, 20, 7, 0.56)      (12, 14, 13, 0.54)  (0, 0, 0, 0)  (0, 0, 0, 0)
   2  (36, 0, 0, 0.0)       (33, 0, 9, 0.0)        (30, 0, 3, 0.0)     (0, 0, 0, 0)  (0, 0, 0, 0)
   3  (30, 0, 0, 0.0)       (30, 0, 4, 0.0)        (27, 0, 3, 0.0)     (0, 0, 0, 0)  (0, 0, 0, 0)
   4  (79, 0, 0, 0.0)       (71, 0, 19, 0.0)       (64, 0, 6, 0.0)     (0, 0, 0, 0)  (0, 0, 0, 0)
   5  (12, 0, 0, 0.0)       (8, 0, 12, 0.0)        (8, 0, 0, 0.0)      (0, 0, 0, 0)  (0, 0, 0, 0)
   6  (11, 0, 0, 0.0)       (11, 0, 5, 0.0)        (10, 0, 0, 0.0)     (0, 0, 0, 0)  (0, 0, 0, 0)
   7  (8, 0, 0, 0.0)        (8, 0, 7, 0.0)         (6, 0, 2, 0.0)      (0, 0, 0, 0)  (0, 0, 0, 0)
   8  (7, 4, 0, 0.36)       (7, 4, 0, 0.36)        (7, 4, 0, 0.36)     (0, 0, 0, 0)  (0, 0, 0, 0)
   9  (11, 0, 0, 0.0)     

# Seeded Alignment Approach

In [529]:
def find_offset(read, ref_idxs, k, step, ival, min_support=3):
    """Find offset of pattern p in k-mer index.
    
    Returns offset as int.
    """
    offset_support = defaultdict(int)
    ref_idx = ref_idxs.get_idx(k, step, ival)
    for pos, kmer in kmer_iter(read, k, step, ival):
        offsets = ref_idx[kmer]
        for offset in offsets:
            offset_support[offset - pos] += 1
        vals = offset_support.values()
        if any(map(lambda x: x >= min_support, vals)):
            max_support = max(vals)
            best_offsets = [offset for offset, support in offset_support.items() if support == max_support]
            if len(best_offsets) > 1:
                continue
            else:
                return best_offsets[0]
    return None

In [530]:
def is_overlap(read, ref_seq, offset, min_olap=2):
    """Returns whether read overlaps with
    mutation position.
    """
    mid = len(ref_seq) / 2
    return (offset + min_olap <= mid) and (offset + len(read) - min_olap >= mid)

In [531]:
%%time
for i in range(1, MAX_ID+1):
    # Iterate over reads
    temp = "reads/reads_{}.txt"
    with open(temp.format(i)) as reads:
        ref_count = 0
        alt_count = 0
        amb_count = 0
        indel = indels[i]
        ref_seq, alt_seq = indel["ref_seq"], indel["alt_seq"]
        ref_idxs = indel["ref_idxs"]
        for read in reads:
            read = read.rstrip("\n").replace("N", "A")
            # Reverse read if applicable
            if not is_forward(read, ref_idxs):
                read = rev_comp(read)
            # Find offset
            offset = find_offset(read, ref_idxs, k=K, step=1, ival=IVAL)
            # Determine if overlaps with mutation position
            if offset and not is_overlap(read, ref_seq, offset, min_olap=2):
                continue
            # Estimate appropriate margin (esp. if insertion)
            if len(alt_seq) - len(ref_seq) > 0:
                margin = len(alt_seq) - len(ref_seq) + 5
            else:
                margin = 5
            aln_delta = calc_aln_delta(read, ref_seq, alt_seq, min_delta=2, offset=offset, margin=margin)
            if aln_delta > 0:
                ref_count += 1
            elif aln_delta < 0:
                alt_count += 1
            else:
                amb_count += 1
        if alt_count + ref_count == 0:
            vaf = 0
        else:
            vaf = round(alt_count/(alt_count + ref_count), 2)
        results[indel["id"]][3] = {
            "ref_count": ref_count,
            "alt_count": alt_count,
            "amb_count": amb_count,
            "vaf": vaf
        }

CPU times: user 15min 15s, sys: 5.56 s, total: 15min 21s
Wall time: 15min 17s


In [532]:
print(print_results(results, headers=HEADERS))

  id  original              kmer                   aln                 seeded_aln            hybrid
----  --------------------  ---------------------  ------------------  --------------------  ------------
   1  (13, 7, 0, 0.35)      (16, 20, 7, 0.56)      (12, 14, 13, 0.54)  (14, 24, 2, 0.63)     (0, 0, 0, 0)
   2  (36, 0, 0, 0.0)       (33, 0, 9, 0.0)        (30, 0, 3, 0.0)     (33, 0, 0, 0.0)       (0, 0, 0, 0)
   3  (30, 0, 0, 0.0)       (30, 0, 4, 0.0)        (27, 0, 3, 0.0)     (30, 0, 0, 0.0)       (0, 0, 0, 0)
   4  (79, 0, 0, 0.0)       (71, 0, 19, 0.0)       (64, 0, 6, 0.0)     (70, 0, 0, 0.0)       (0, 0, 0, 0)
   5  (12, 0, 0, 0.0)       (8, 0, 12, 0.0)        (8, 0, 0, 0.0)      (8, 0, 0, 0.0)        (0, 0, 0, 0)
   6  (11, 0, 0, 0.0)       (11, 0, 5, 0.0)        (10, 0, 0, 0.0)     (10, 0, 0, 0.0)       (0, 0, 0, 0)
   7  (8, 0, 0, 0.0)        (8, 0, 7, 0.0)         (6, 0, 2, 0.0)      (8, 0, 0, 0.0)        (0, 0, 0, 0)
   8  (7, 4, 0, 0.36)       (7, 4, 0, 0.36)        (

# Hybrid Approach

In [533]:
%%time
for i in range(1, MAX_ID+1):
    # Iterate over reads
    temp = "reads/reads_{}.txt"
    with open(temp.format(i)) as reads:
        ref_count = 0
        alt_count = 0
        amb_count = 0
        indel = indels[i]
        ref_idxs = indel["ref_idxs"]
        alt_idxs = indel["alt_idxs"]
        # Iterate over reads
        for read in reads:
            read = read.rstrip("\n").replace("N", "A")
            # Reverse read if applicable
            if not is_forward(read, ref_idxs):
                read = rev_comp(read)
            # Find offset
            offset = find_offset(read, ref_idxs, k=K, step=1, ival=IVAL)
            # Determine if overlaps with mutation position
            if offset and not is_overlap(read, ref_seq, offset, min_olap=0):
                continue
            # Calculate score delta
            kmer_delta = calc_kmer_delta(read, ref_idxs, alt_idxs, min_delta=3, max_ival=5)
            if kmer_delta > 0:
                ref_count += 1
            elif kmer_delta < 0:
                alt_count += 1
            else:
                # If k-mer method can't discriminate between ref and alt, use alignment method
                # Estimate appropriate margin (esp. if insertion)
                if len(alt_seq) - len(ref_seq) > 0:
                    margin = len(alt_seq) - len(ref_seq) + 5
                else:
                    margin = 5
                aln_delta = calc_aln_delta(read, ref_seq, alt_seq, min_delta=2, offset=offset, margin=margin)
                if aln_delta > 0:
                    ref_count += 1
                elif aln_delta < 0:
                    alt_count += 1
                else:
                    amb_count += 1
        vaf = round(alt_count/(alt_count + ref_count), 2)
        results[indel["id"]][4] = {
            "ref_count": ref_count,
            "alt_count": alt_count,
            "amb_count": amb_count,
            "vaf": vaf
        }

CPU times: user 25.1 s, sys: 193 ms, total: 25.3 s
Wall time: 25.2 s


In [534]:
print(print_results(results, headers=HEADERS))

  id  original              kmer                   aln                 seeded_aln            hybrid
----  --------------------  ---------------------  ------------------  --------------------  --------------------
   1  (13, 7, 0, 0.35)      (16, 20, 7, 0.56)      (12, 14, 13, 0.54)  (14, 24, 2, 0.63)     (27, 13, 1, 0.33)
   2  (36, 0, 0, 0.0)       (33, 0, 9, 0.0)        (30, 0, 3, 0.0)     (33, 0, 0, 0.0)       (33, 0, 5, 0.0)
   3  (30, 0, 0, 0.0)       (30, 0, 4, 0.0)        (27, 0, 3, 0.0)     (30, 0, 0, 0.0)       (30, 0, 0, 0.0)
   4  (79, 0, 0, 0.0)       (71, 0, 19, 0.0)       (64, 0, 6, 0.0)     (70, 0, 0, 0.0)       (73, 0, 7, 0.0)
   5  (12, 0, 0, 0.0)       (8, 0, 12, 0.0)        (8, 0, 0, 0.0)      (8, 0, 0, 0.0)        (8, 0, 0, 0.0)
   6  (11, 0, 0, 0.0)       (11, 0, 5, 0.0)        (10, 0, 0, 0.0)     (10, 0, 0, 0.0)       (11, 0, 0, 0.0)
   7  (8, 0, 0, 0.0)        (8, 0, 7, 0.0)         (6, 0, 2, 0.0)      (8, 0, 0, 0.0)        (8, 0, 2, 0.0)
   8  (7, 4, 0, 0.36)  

# BWA Approach

**Note:** This method requires `bwa` and `samtools` to be installed in PATH.

In [535]:
import os
import subprocess
def bwa_index(name, ref_seq, alt_seq):
    # Create directory and FASTA file
    try:
        os.mkdir("bams/{}".format(name))
    except OSError:
        pass
    with open("bams/{}/genome.fa".format(name), "w") as fasta:
        fasta.write(">ref_allele\n{}\n>alt_allele\n{}\n".format(ref_seq, alt_seq))
    # Index FASTA file
    output = subprocess.check_output(["bwa", "index", "bams/{}/genome.fa".format(name)])
    return output

In [536]:
def bwa_mem(name, reads):
    # Create FASTQ file
    with open("bams/{}/reads.fa".format(name), "w") as fasta:
        for i, read in enumerate(reads, 1):
            fasta.write(">read_{}\n{}\n".format(i, read))
    # Call BWA MEM
    output = subprocess.check_output("bwa mem bams/{0}/genome.fa bams/{0}/reads.fa | "
                                     "samtools view -bS - > bams/{0}/{0}.bam".format(name), shell=True)
    return output

In [537]:
def sort_and_index(name):
    # Sort BAM file
    output = subprocess.check_output("samtools sort bams/{0}/{0}.bam bams/{0}/{0}.sort".format(name), shell=True)
    # Index sorted BAM file
    output = subprocess.check_output("samtools index bams/{0}/{0}.sort.bam".format(name), shell=True)

In [538]:
%%time
for i in range(1, MAX_ID+1):
    # Iterate over reads
    temp = "reads/reads_{}.txt"
    with open(temp.format(i)) as reads:
        ref_count = 0
        alt_count = 0
        amb_count = 0
        indel = indels[i]
        ref_seq = indel["ref_seq"]
        alt_seq = indel["alt_seq"]
        ref_idxs = indel["ref_idxs"]
        alt_idxs = indel["alt_idxs"]
        name = "indel_{}".format(str(i))
        try:
            os.mkdir("bams")
        except OSError:
            pass
        # Index reference
        bwa_index(name, ref_seq, alt_seq)
        # Align reads
        reads = [read.rstrip("\n") for read in reads]
        bwa_mem(name, reads)
        # Sort and index BAM file
        sort_and_index(name)

CPU times: user 148 ms, sys: 670 ms, total: 818 ms
Wall time: 4.49 s
