In [1]:
# load the sequence and probabilities into memory
sequence_filename = "chr22.maf.ancestors.42000000.complete.boreo.fa.txt"
probabilities_filename = "chr22.maf.ancestors.42000000.complete.boreo.conf.txt"

with open(sequence_filename) as f:
    sequence = f.readline()

with open(probabilities_filename) as f:
    probabilities = f.readline().split()

# make all probabilities float
n = len(probabilities)
for i in range(n):
    probabilities[i] = float(probabilities[i])

In [2]:
# set random seed
import random

random.seed(1)

In [3]:
# generate test sequence

In [4]:
def get_test_sequence(length,prob_insertion,prob_deletion):
    nucleotides = ['A', 'G', 'C', 'T']
    
    # get random starting position
    
    len_sequence = len(sequence)
    # len of probabilities should be the same
    
    start_index = random.randrange(len_sequence-length-1)
    
    query = list()
    
    for i in range(start_index, start_index+length):
    
        insertion_deletion_prob = random.random()
        
        if insertion_deletion_prob < prob_insertion:
            # if we are within probability of getting an insertion
            nucleotide = nucleotides[random.randrange(4)]

        elif insertion_deletion_prob < prob_insertion + prob_deletion:
            # if we are within probability of getting a deletion
            nucleotide = ""
        
        else:
            # if there is no indel
            
            if (probabilities[i] > random.random()):
                nucleotide = sequence[i]
            else:
                temp_nucleotides = ['A', 'G', 'C', 'T']
                temp_nucleotides.remove(sequence[i])
                nucleotide = temp_nucleotides[random.randrange(3)]

        # add to our list representing the sequence
        
        query.append(nucleotide)
    
    return ["".join(query),start_index]

In [5]:
# test_name = "test_sequence"
# new_filename = test_name + ".fa"

# length = 300
# prob_insertion = 0.01
# prob_deletion = 0.01

# test_sequence_list = get_test_sequence(length,prob_insertion,prob_deletion)

# with open(new_filename, 'w') as f:
#     f.write(">" + test_name + " " + str(test_sequence_list[1]) + " " + str(length) + "\n")
#     f.write(test_sequence_list[0])

In [6]:
import blast
import os

In [13]:
def test_probabilistic_blast(test_name,iterations):
    
    pblast = blast.PBlast()
    
    exact_index_matches = 0
    contains_match = 0
    
    cur_dir = os.getcwd()
    os.mkdir(test_name)
    
    for i in range(iterations):
        
        new_filename = test_name +  str(i) + ".fa"
        new_path_to_file = os.path.join(cur_dir, test_name, new_filename)

        length = 1000
        prob_insertion = 0.005
        prob_deletion = 0.005

        test_sequence_list = get_test_sequence(length,prob_insertion,prob_deletion)

        with open(new_path_to_file, 'w') as f:
            f.write(">" + test_name + " " + str(test_sequence_list[1]) + " " + str(length) + "\n")
            f.write(test_sequence_list[0])
        
        # call to probabilistic blast
        pblast.load_query_file(new_path_to_file)
        
        # compare with start and end index locations
        result = pblast.probabilistic_blast()
        
        if test_sequence_list[1] == result[-1][0] and result[-1][1] - result[-1][0] == length:
            exact_index_matches += 1
        elif test_sequence_list[1] <= result[-1][0] and result[-1][1] - result[-1][0] >= length:
            contains_match += 1
        
        # check top ten results as well
        topten = 0
        for j in range(-10,-1,-1):
            if test_sequence_list[1] <= result[j][0] and result[j][1] - result[j][0] >= length:
                topten += 1
                break
    
    pblast.exit()
    
    return [exact_index_matches,contains_match,topten,iterations]
        

In [None]:
results = test_probabilistic_blast("fourth_test",100)

total_seeds:  941
stopped before ungapped extension:  1
stopped before gapped extension:  431
HSPs:  509
BEST SCORING ALIGNMENT: 
140891
141906

query: 	
HSP: 	GCTACCTATATTTTTACCTTTTAGGGAAGAGAGCGTAGAAAATG
gap: 	-AGATTGGGTCTTTGACCAACAGCAAACAGAGGAAATTGATTTTACACATTTTTCTAGTGTATCAATGAATGAGTCTTCTGGCAAGTAGGAGACTTGGATTTTGTTTACCTGTTA-CTGTTTATCACCCCTATTTTGAAAGAAAATTTCATTTTTTTTGTCTACAAATCTGAAAGGTAACTAATATACCTTTACTTAAGATAACCTTCCAGAAAGATAACCTTCCAACAATGCTCTGGTTGTTTGTCATTTGTTTTCCTTAAAGTTTGTTTTGGTGCTAGGA-TATTATAAGAAGTAGGAATAATAGATAATATTAATTGAGTGTCTCTGTGTGTCAGGCTCGGGGCTAAGATCTTTACACCTATTATCTCATTTAGCCCTC-CTGCAAGCCTG-GTATTCACAGACTTTCATTATCCCCATTTTAATGAGGAGGAAACTGAGGTTCAGAGATATCGAGTAACTTGTCCCGAGTAGCACAGCTAATC-GCGGGAGAGCCAGCATCTGAACGCCGCAACTCCCACACCTGAGCCAGGCTCTCACTCCAGGAGCTCTAGGGCTCTGCTCACTTCTCTGTGCAGCCCGGACTGGTGTCACTGTGTTTTCACCCGTCTTCCTTCCCAGATGCACAAGTCTGGCGTTCTGAGGAAGGCCATTGAATACATCAAATACTTGCGGCAGGTCAATCATAAACTGCGCCAGGAGAACATGGTGCTGAAGCTGGCAAATCAGAAAAACAGTGAATGTGCTAGT-GGTTGGAATTCGCCGGTA-GGCTCCTTGAGAAAAG

total_seeds:  1257
stopped before ungapped extension:  9
stopped before gapped extension:  716
HSPs:  532
BEST SCORING ALIGNMENT: 
342780
343793

query: 	
HSP: 	TTGATTTCTACCCAATAAGCTTAATTACAGGAACGCATATTTACTAAAACAACACGTGCGGACACCATTCTAGGGCTGAAGCCAGCCCCGAGTAATTATTTCCTTTAAAACCACCTCAACTCAAAAGCCT
gap: 	-TAGGAATAGTTTAGTGCCCACCAAGGAAGCTACCTGATCTCCAGCGAAGTTAATTTTAGCCCTAAGAGTCCTCGGTCATCATCCTATTTATAGATGAGAAAACTTGATGACAAACTTTCTAAGAGCTCTACTAATGCCTCATTTAATGGCACTCCTGAAGAAAGG-GTTCAATACATCTGAATGTTCCAGATAACTGAAGTTATTCAGACTAGGAAAAATATGCACAGAAAAATGACAACAAATTGAGAAGAAAAAAA-TCCAAATTAATTATCAAGGGACATAAAACATCAAAATTAACAGGGAACAATATTAATATCACTAATA-AAGAAATGAAATTAAAAAGTAGAAACTAACAATTTAAAAACAATATCCATTGCCACTGAGGTTGTAGCAAATGGGCTCACAAGTGCACGTGGATGATCATACCTTGGTATGACCCTTTTTTGGTTAAACCACAAGATAATGCCTATCAAAAACCATAACAATG-TCGCACTTTTTGAGTTAGCAGTTCAACATCCTGAAACTTTCCCCCAAATCCAGGAATAAAAATGTTTACTGCTGCATAAAAAGTAGTAAA-TAACTGGAAACAACCTCAATGTCCAACAGTGCAGGAATACTTGGATAAATTACATTAGACCCAAATCAAATTAAAAAAAAAAAAACACAGATTAATATGAAAATTAAAATTACAAAATT

total_seeds:  647
stopped before ungapped extension:  5
stopped before gapped extension:  324
HSPs:  318
BEST SCORING ALIGNMENT: 
150959
151978

query: 	
HSP: 	GGCACTAGCCCTGTCCTCTGGGAGCATACTGTCTGGCAGGGAATCCAGACAGGGACCAGGACTTCCCGGCCAGATGCGTGATCGTCGCGTTGCAAGGGAGAAACGTGAACTGGGAGGGCACAGACACTGAACACGCTCCGGTTTTACGTACTTAAAGAGGACGCTGAGCCCCTGACCCCGTCTTAGGGGAAGTCAAGGTGGGCTTTCTGGGGAACATGAATCTAGGCTGAGAGCAGGACGGATGGGCCAGGCCTCTCCCCTATTCGGTTTTTTTGTCGTAGTGCTCTGGGCCGATGTCCTCACACTTGTAACTTCTGTGCCCCCCAAAAGTGGTTTGAAGACATTTATTTTATTGAAATACAACATACATACAGAAAAATGCACGCATTTTAACTTACTGCTCGAAGTGAACTCATCCGTGTAACAACCACCCTGATCAAGAAGTAGAACTTTCCCAGCTCCCCCAGAAGCCTTCGACAGGCCCTCCCCCCTCGCCCTGCCCCAACGAAGGTTACCGCTATTCTGAATTCTGTCACCGGAGTTTAATCTGACTGATGTTTGTTGTTTCTGTAAACTTTTTATAGAAGTACAGACTTTAAGTCTTGGGCATCTTTCTTTGTAGCCTCTTCATTCAGCAGAGGTACTTGGTACTGCTGACCGACGTTCGGCTTGTTTCCAGCATTTGCAGTTATAAACACTGCAGCAGGGAGCGTCGTCGAACAAGGTCTTCACCCACGTATACAAGAGCATCCTAAGGCTACATCTCTAGAGGTGGAGAGGCTGGGTCAAAGGGTATAAATATTTGAAACTTAATTGTATTGCCAAATTT
gap: 	-CAT

total_seeds:  1024
stopped before ungapped extension:  14
stopped before gapped extension:  639
HSPs:  371
BEST SCORING ALIGNMENT: 
584717
585732

query: 	----------TCT---T---GCAAGGAAACAGAACAGCCGCAGGCCAAGTCAAGGTGGCAGTTGCGGTAGCCCCCAGGCTGCAGAGGAACCCCCTCTGAGAAAGCAAAGGCACGCTCCCCCTCCCCTACCCCA-GTGGAAGAAAGGAGTCTTGAGTGCTCTGCCCGGTGGTCCCTGTCAGGTGGTAAAGGGCCTCCCCCCCGAGAGTGCCAC-GAACTCAGATGGCGACTCTGGTCACC-AGCGGCCCAGGTCAGAGGGTGGCTGTCGAGGGGGATGCCCATCGTCCAGAGCGGGCAGGAGCAGGAGGGGAGGCGCTGGGGAAGCAGCAGAGAAGGGGGGCTCGGGGCCCCCCGCAGAGGTGCGCCTGGGCCAGCCTCTCCCGGTCCCACAGGGAAGCCGTCCTGCAAGCAGGAAAACCATGGGCCGGGGCAGCGAGCTCAGCAGCAGCGCCCAGAGTGGGCCACAGCGTTCGGGTCCTCAGCCACTGGAATGCCAGGC-GGAAGGACACACCAGCCCGCACACACTCTTCCGGCAGCGGCCTCTCTTCACCCACGGTGTGTCCAGACGGCACGCTCTGC-
HSP: 	CCCCGGCCTGCAGCCGACGGGTTCCACCACGAACTGCTCAGGGGTCCCGAGGCGGGTCCACACCGCACAGGGGCCCCAACCTAGGGGCAACCGCAGCCGCCCCTGGGGAGCAACGGGGGACCGCCACCCGGGGTTCTGCCGGGCTGGGCCTCACTCCCCTGAGCCAGGTGGGAGGCCTGTGCCAAAGGACGCCCTCACTCCTAGCCCAGAATGTCCGGGGGGGTTGGCCGCAGGCGGCCCCAGGGCCGGGCGCCAAG

In [9]:
# test 1: 0 10 100
# test 2: 0 4 100
# test 3: 0 45 100

# test 3 with epsilon 0 : 0 0 100
# test 3 with epsilon 10: 0 46 100

# higher epsilon is better

# test 3 with top ten: 0 45 100

# results

In [10]:
def test_probabilistic_blast(test_name,pblast):
    
    exact_index_matches = 0
    contains_match = 0
    
    cur_dir = os.getcwd()
    test_dir = os.path.join(cur_dir, test_name) 
    
    files = []
    # r=root, d=directories, f = files
    for r, d, f in os.walk(test_dir):
        for file in f:
            if '.fa' in file:
                files.append(os.path.join(r, file))
    
    for file in files:
        
        #read first line of fasta file
        with open(file, 'r') as f:
            info = f.readline().split()
            start_index = int(info[1])
            length = int(info[2])
        
        # call to probabilistic blast
        pblast.load_query_file(file)
        
        # compare with start and end index locations
        result = pblast.probabilistic_blast()
        
        if start_index == result[-1][0] and result[-1][1] - result[-1][0] == length:
            exact_index_matches += 1
        elif start_index <= result[-1][0] and result[-1][1] - result[-1][0] >= length:
            contains_match += 1
        
        # check top ten results as well
        topten = 0
        for j in range(-10,-1,-1):
            if test_sequence_list[1] <= result[j][0] and result[j][1] - result[j][0] >= length:
                topten += 1
                break
    
    iterations = len(files)
    return [exact_index_matches,contains_match,topten,iterations]

In [12]:
pblast = blast.PBlast()

pblast.set_blast_params(gapped_stop = 15)

results = test_probabilistic_blast("third_test",pblast)

pblast.exit()

print(results)


total_seeds:  267
stopped before ungapped extension:  4
stopped before gapped extension:  153
HSPs:  110
BEST SCORING ALIGNMENT: 
578319
578732

query: 	--------------CAGTTTATAAAGTACCAAATGAGGCAGGGCTCAAAAGCCCAAGTGCGTGT-
HSP: 	GGCGGGGCTGTGCCAGTTGTGGAATTAATCCTGCTGGCCGGGAGGGAGGCAGTAGCGAGCGGTAGGGACTGCGGGACTGGAGCCAGCTGGAAGGCAGCCCGGCCAGTGGTCATCATGTGGGAACTCTGTCTAGTGTCGCCAGAATTTCCAGTTGCTCAAGTGCGGCAGGAGGTCTGCTGTGGTGTATTCTTTTAGCTGGTCCCAGCGATGGTCAAAGGCACTGTACCGGACTGTGGGATGTTTTGGCCACGGGCTGCCAGTTTGTGACCTGTTGGTGAGGTATGTCTTGGTAAACCTCTCTGATCCTTTGTCTGCCCCTCCCTGCAAACCCATCATGGAGAGGGGAGGT

db: 	TGGGAATGTGACTTCAGTTTATAAAGTACCAAACGAGGCAGGGGTCAAAAGCCCAAGTGCCTGTG
HSP: 	GGAGGGGCTGGGCAAGTTGTGGAGTTAATCCTGCTGGCTGGGAGGGAGGCAGTAGGGAGTGGTAGGGACTGTGGGACTGGAGCCAGCTGGCGGGCAGCCGGGCCAGTGGTTATCATGTGGGAACTCGGTCTAGTGTGGCCAGAATTTCCAGTTGCTCAAGAGAAGCAGGAGGTCTGCTGTGTTGTATTTTTTTAGGTGGTCCCAGCAAGGGTCAAAGGGACTGTACAGGACTGGGGGCTGTTTTGGCCTCGGGCTGCCAGTTTGTGACCTGTTGGGGAGGATTGTCTTTGTTATCATCTCTGAACCTTTGTCTGCCCCTCCCTGCAAACCCATCATGGAGAGGGGAG

total_seeds:  327
stopped before ungapped extension:  0
stopped before gapped extension:  134
HSPs:  193
BEST SCORING ALIGNMENT: 
245340
245754

query: 	
HSP: 	AGTTGCCTTAGCTATTCTTCCTCTGGCCTGTCTTTTTGTCTTCTCTCCTATCTGGCCTTGCCTTTGCTTCTCTAAGGTTTAGTCCCCGTCTTCC
gap: 	-TGAGAGGCGGGTAGACGTACTGTCAAGCCCTACAGCACTGATCTAATGGATGTACCAGAGAACTCTTTCAAACTTTGACGACAAATAATCCTTTTGTGAGTCTCTGCCATCTTTAACACATGGCTCCCACGGCCTCTTTGGTCAGCCAGCAGAGAGGGGAAGAGAATAAAGATCTCACCTGGGAATTTTTCCGGGGCCAGGCCTGGAAGCCGTGTACATTACTTCTTGTCACATGCCATAGGCCAGAATTGAGTGTGGTAGGCTGATAATTGCTCCCAAAGATATATCCATGTCATAATCCTC---A---G--------
db: 	
HSP: 	AGTTGCCTTAGCTCTTCTTCCTCTGGCATGGCTTTTTGTCTTCTCTCCTCTCTGGCCTTGCCTTTGCTTCTCTTAGGTTTAGTCCCCGTCTTCC
gap: 	ATGAGAGGGGGGTAGAGGTACTGTCAAGCCCTACAGCACTGATCTAATGGATGTACCAGAGAACTCTTTCAAACTTTGAATACAAATAATCCTTTTGTGTGTCTCTGCCATCTTTAACACATGGCTTCCACGGCCTCTTTGGATAGCCAGCAGAGAGGGGAAGAGAATAAAGATCTCACCTGGGAAGTTTTCATGGGCCAGGCCTGGAAGCTGTGTACATTACTTCTGGTCACATGCCATAGGCCAGAACTCAGTGTGGTAGGCTGATAATGGATCCCAAAGATATATCCATGTCCTAATCCTCA

total_seeds:  308
stopped before ungapped extension:  1
stopped before gapped extension:  156
HSPs:  151
BEST SCORING ALIGNMENT: 
270329
270743

query: 	
HSP: 	CCCTTAAGGTTAAGAAGAGACAACTGTGCGATATCCAAACGAGAGGCCGGCAGTGGCCCAGGGGGGTCAGGGAAACCCTGGAGTTTTGGC
gap: 	-TCAGGGTTGGAGCTGAGGGGCCCCCAGATTAGGGGATGGTTTAAGCACAATGGTTTCTATGGGCTTGGGGTTCATTTAAGCTCAGCTGGGGCCTGTTTCTTTGGATTTTAAAAAGCCGGGCCAATGACCTGCCATGCCTTCTGGGCGATACAATCCCATTATCCTTTTCTCGGAGTCCACCAATGTCCAGTTTTTTGCATGATCCTCACCACAACCCTTTGAGAGAAAAGGCAGTAGAGACTAGCCCATTTCACAGATGAAGACACTCAGAGCGTCACGGATACAGCAAGTGAGTAGGGGCCACA--------G----GG--T
db: 	
HSP: 	CCCTTTAGGTTAATAAGATTCAACTGTGCCATATGCAAATGAGAGGCCGGCAGGGGCCCAGGAGGGTCACGGAAACCCTGGAGGTTTGGC
gap: 	CTCAGGGTTGGAGCTGAGGGGCCCCCAGATTAGGGGATGGTTTAAGCACAATGGTTTCTATGGGTTTGGGGTCCATTTCAGCCCAGCTGGGTCCTGTTTCTTTGGATTTTAAAAAGCTGTGTCAAGGCCCTGCCATGCCTCCTGGTCGATACAATCACATTATCCTTTTCTCCGAGTCCTCCGATGTCCAGTTTTTTGCATGATCCTCACAACAACCCTTTGAGAGAAAAGGCAGGAGAGACTAGCCCATTTCACAGATGAAGACACTAAGAGCCTCAGGGGCACAGCAAGTGAGTAGGGGCCACAGGG

total_seeds:  266
stopped before ungapped extension:  0
stopped before gapped extension:  106
HSPs:  160
BEST SCORING ALIGNMENT: 
380524
380932

query: 	
HSP: 	GCAGGGTGTGGAGGCGTGATGGCCTGTCATCCCAGATAGGCGGGAGTGGATGCCAGACCAAGGCAGCTAGATCGATTGACGTATGCCGCCTGGCCCGGGAATGGTGTCACCACACTCTAAAATTCTCCAGGAGAAATTCCCAGCGTTCCTCACAGGGCAGG
gap: 	-AGTTCTACGGTTTCTTCACCTAATGCACTAATACTCCA-CC-ACCTCCTCTGCTGTTGTTTTCCTGCTTAGACTTCCTCTGAGA-TTTTTCCCCCATTCTTTAACCATTTTAGTT-CTCTTAGGAACGCTTTGCAAGTTTTCTACAACGAATCCAAATTCAACATTACTTTAATA-CT-CTATGTG-AGGGGACAAAAAGACA-AAGAGATTAACAAGGCATGGTGGCCATCTAAGAAT-----A---
db: 	
HSP: 	GCAGGGTGTGGTGGCGTGTTGGCCTGTAGTCCCAGATACTCAGGGTTTGATTCCAGGCCGAGGCAGGAAGATCGTTTGAAGGCTCCTCCCTGGCCCAGGCATCGTTTCACCACACTCTAAAATTCTCCAGGAGAAATTCCCAGCGTTCCTCACAGGGCAGG
gap: 	GAGTTCTCCGGTTTCTTCACCTAAACCACTAATACTCCATCCGACCTCATCTGCTGTTGTTTTCCTGCTTATATTTCCTCTG-GATTTTTTCCCCCATTCTTTAACCATTTTAGTTACTCTTAGGAACGCTTTGCAAGTTTTCTACAACGAATCCAAATTCAACATTACTTTAATACCTACTATGTGAAGGGGACATAAAGACATAA-AGATTAACAAGGCATGGTGGCCATCTAAGAATAC

total_seeds:  635
stopped before ungapped extension:  8
stopped before gapped extension:  294
HSPs:  333
BEST SCORING ALIGNMENT: 
563507
563920

query: 	
HSP: 	ATCCAAGGCGGGCGCCGAGGCCGGGGGGCCCGACGTCCGGGCTCGGTGTGCCTTACGCTCCATCTTGGTCTCGCGGTGCTGCCAGGCTT
gap: 	-GAAGTTTAGCAGCACCTTGCGGGCGCGCTTCCGCCTCTCCCTCTCCAGCACCCGCAGGCTGGCTGCCGCCGTCCGGGGGAGCACAGGCCGCCGGCCCCGGCGGGTCACCTTCACCTAGCCCTCCTCGTCAGCGACCCCGTCCTCCTCCTTGGCCTTAGCCTCCTCCTGCAAGGAACGGGACCCCGTTGTAGGCTGGGA-CTCAGGAGGGCGACCCCCTCACCCCCCTCCCGAACCCAGAACCGCCCTG-GTCGCCCGCCGGCGCCTGCTGCTCCCCCCCCCCCGGCACAGTGTGGGAGGCTCAGCAT--T------G----AG-
db: 	
HSP: 	ATCCACGGCGGGGGCCAAGGCCGGGGGGCCCGAGGGCCGGGCTCGGTGTGCCTTACGCTCCATCTTGGTCTCGCGGTGCTGCCAGGCGT
gap: 	AGAAGTTGAGCAGCTCCTTGCGGGCGCGCTTCCGCCTCTCCCTCTCCAGCACCCGCAGGCTGGCTGCCTCCGTCCGGGGGAGCACAGGCCGCCGGCCCCGGCGGGTCACCTTCACCCAGCCCTCCTCGTCAGGGACCCCCTCCTCCTCCTTGGCCTTAGCCTCCTCCTGCAAGGAACGGGACCCTGTCGTCGGCTGGGACCTCAGGAGCGCGCCCCCCGCACCCCCCTCCCGCACTCAGAACCGCCCTGAG-CGCCCACTGGCGCCTGCTGCTCCCCCCCCCCGGGCACAGTGTGGGTGGCTAAGCATTG

total_seeds:  266
stopped before ungapped extension:  1
stopped before gapped extension:  114
HSPs:  151
BEST SCORING ALIGNMENT: 
131637
132048

query: 	
HSP: 	AAAACCATGTAATTTCACTACCCATTATTAAAATGTGTATATAGTCAGTTCTGCTTTAAGGCAACATGTGCATTCCTAAAAATCACACTATGCAAAATCATGCAATAAAAACCACAGGGTGGGCATGTTGG
gap: 	GGCTT-CAAGTAATGGGAAATGGGGTTGAGGATC-CAACAATCAAAATCTTGATAGGTGAGCAAAAAAATCTATAGAAGATTT-TTTTCTAAAAAGTTAGGGCCACAACCTAATAAAACAGGCACACAGATTTACAGATAAGTGCCTAAGAAATATATAAATACCACAATAAATACGGCACTTTGCCTTGAAAAAGACCTGAGGTTTGCTTGTGGAAGTGTGCGTCGCTAGGTTACAGCTTGTGAGTTATAGTGAA-TGCTGG-A--A--G-TA---G--
db: 	
HSP: 	AAAACCATGTAATTTCACTACGCAGTATTAAAATTTGTATATGGTCAGTTCTGCTATAATGCAACATATGCATTCCTAAAAATCACACTATGCAAAATCATGCAATAAAAACCACAGGGTGGGCATGTTGG
gap: 	CTCATGCTCGTAATGGGAAATGGGGTTAAGGATCACAACAATAAAAAACTTCATAGGTGACCAAAAAAAGGGAAAAAAGATTTATTTTTTAAAAAGATAGGAACCGAACCTAATAAAACAGTCACGCAGTTTTACAGATAAGTGACTAAGAAATATATAAATACCACAATAAATACGGCACTTTGCCTTGAAAAAGACCTGAGGTTTGCTTGTGGAAGTGGGCGTCGTAAGGTTACAGCTTGTGAGTTATTGTGAAGTGGTGGAAGGAGGG

total_seeds:  344
stopped before ungapped extension:  0
stopped before gapped extension:  242
HSPs:  102
BEST SCORING ALIGNMENT: 
575332
575744

query: 	
HSP: 	GCTCAACCGCGCACGGCAGCCCTGAGTGTGGACT
gap: 	-ATGGGACCAGGACTGACCCCTC-GGCCCACCACGACCGACGACAACAACCAGTTCTGTCCCTGCTTCTGAGTCCTGGCTAAGCCGTATT-CCTGCTGTGTGACCCTGGGCAAGTCACTTGGCCTCTCTGGGCCTCCGTTTCCTCACCTGTAAGAA-GAGGAGGTTGGACTATACGA-TTCTGACATCCTGTCATTGTGTTCCCCAGGGCTCAGCCTCTCTGG-GGTGACACA-CTTTCATCAATTTCATTTTTCTCGTCCATAAAATAACCAATAAAAGGACCACACATGCCATAAGGGCAAGAACCATGTCAGTCTTTTCATCACCACCA-CCCCAATCCGGGCACATAGTAAGAGCTCAGTAAA-------TG--T--G-
db: 	
HSP: 	GCACAACCGCACACAGCAGCCCTGAGTGTGGACT
gap: 	GATGGGACAAGTACTGACCCCTCTGG-CCACCACTACCAATGACAACAACCAGTGCTGTCCCTGCATCTGAGTCCTGGCTCTGCCGT-TTACCTGCTGTGTGACCCTGGGCAAGTCACTTCGCCTCTCTGGGCCTCAGTTTCCTCACCTGTAA-AATGAGGAGGTTGGACTAGACGACTTGTGACATCCTGTCATTCTGTTCCCGAGGGCTCAGCCTCTC-GGTTGTGACACAGCTTTCATCAGTTTCAGTTTTCTCGTCCGTAAAATGACCAATAAAATGACCACAGATTCCATAAGGGCAAGAACCATGTCTGTCTTTTTATCACCA-CATCCCCAATCCCGGCACATAGTAAGTGCTCA

total_seeds:  275
stopped before ungapped extension:  4
stopped before gapped extension:  130
HSPs:  141
BEST SCORING ALIGNMENT: 
569001
569410

query: 	-----------TGGGGCCCGGACAGTTTACAGGA-TTCCTCCAGGCCACAGCAAGTAGGTGAAAAAAGGAAGACGCCCGAACCCCG-CTCCACTTCTATTTCCAATCGACCACCCTGCCTGAGCTAAGCCCTCAGGGCAACCCAAGTCTTGGAAGGACCTCCTACCAGCCGGTCTTGTCAGAAGAGAGCGGCACTACACATAGTGCTTGTAATAGCCAAGGATCCCAAACTCCTGCTAGCTCAAGTCCTACAAAATGTGCCAGGTACACCGTGCTAGGCAGGGCCCTGCCGGTGGGATGTGGGATATTATAGCCCCT--
HSP: 	GGGGGGACGGAAAGACGAGGAGAAGGATTTACACGATCTTAAAATCACGTGGCACCCCTTCTAACACTCAACCGTCTTTGCCTCCGCAGGT

db: 	AAGGAGGTAGTTGGGGCCCTGAAAGTTTACAGGACTTCCTCAAGGCCACAGCAAGTATGTGATAAAAGGAAGATGCCCGAAGCCTGCCTCCACTTCTCTTTCCAATCTACCACCCTGCCTGAGCTAAGCCCTCAGGGCCACCCACGTCTTGGAAGGACTTCCTACCAGCCGGTCTTGTAAGATGAGAGCAGCCCTACACACTGTGCTTGTAATAGCCAAGGATCCCAAACTCCAGCTAGCTCAAGTCCTAGAAAATGTGCCAGGTACACTGTGCTACGCAGGGCGCGGCCGAAGGGATGTGGGATATTATCGCCCCTGG
HSP: 	GGTGAGAAGGAAAGATGAGGGGAAGAATTTACCAGATTTTAAAATCACGTTGCACCCCTTCAAACACTCTACCGTCTTTGCCTCTGTAGTT
307.6

total_seeds:  394
stopped before ungapped extension:  3
stopped before gapped extension:  280
HSPs:  111
BEST SCORING ALIGNMENT: 
445551
445964

query: 	
HSP: 	GGCGTCAGATCCGGAAGGGGAAAGGAAATAATGCGTATTTAAAGATTACTGTGTGCCAGGCACGTTCCCAAGTGAC
gap: 	-CTAACCCTCGGGACGGCAGCATGAGGCCAGCGGTGCTATCCCAACTTGGAGGGGGGGGGGCCGAGGCACGGAGTCAG-CACAGAGCCGAGCCGGGGTTCACCCCAGACTGACTCCGGAGGTGGGCACTGCTGCCGTGGGGCCGCGGGGAA-CCCTGAGCACAGCGTCGGAGAACACTGCTGGATGCAGCCCATCCTGACTACTTCTCCAGGGGGGGAGAGGCCTTGTGCTGAAGTAACAGAAGGAAGCGGTGGGCCCCCGCCGCGGCCCTCAGAAGACCCTGCGGAGGGAAGGCACCTCTGACCTTGGCCATGGACT---C-T----C--CCT--C-
db: 	
HSP: 	GGCATCAGATCAGGAAGGGGAAAGGAATGAACACGTATTCAATGACCACTGTGTGCCAGGCACTTCCCCAAGAGAC
gap: 	TCTAACCCTCGCGACGGCTGCATGAGGCGAGCGGTGCTATCCCAACTTGGAGGGGGAGGGGCGGAGGCACAGAG-CGGCCACAGAGCCGAGCCGGGGTTCACCCCAGTCTGACTCCGGAGGTCGGCACTGCTGCGGTGGGGCCGCGGGGAAGCCCTGGGCACAGCGTCTGAGAACACTGCTGGATGCAGCCCTTCCTGAAGACGTCTCCAGGTGGGGAGAGGCCTTGTGCTGAAGGAACAGAAGGGAGCGGTGGGCACCTGCCACGGCCCTCAGAAGACCCTGCCGAGGGAAGGCACCTCTGACCTTGGCCATGAACTCGCCC

total_seeds:  331
stopped before ungapped extension:  0
stopped before gapped extension:  134
HSPs:  197
BEST SCORING ALIGNMENT: 
249976
250388

query: 	----T--A-T-A----AAACAGGGTTAGGCTCTAGGCTCCAATGATTATAATCAGGTGTTTCACCTACATCAGTGGCCAGAGATCCATTTGAAAGTCATGAGGTATAGGACTATTCAGGAACTGCCCTGTGCCTTTCAGGATCTCTAGCTTCCTTGACCCCTCACAC-TTAAAGGCCA-TCAGGGGCCTCCAGTTGCGATAACCAAAAATGCCCCCACTTATTTCCTAATAGCCCCTAGGGGGGCAATGCCA-CCCTGCTGAGAACCACTATTCTAGACTTTTCCAAGGTTTTCAGGCCCTTCAGAGCTCACCGTTCTCCCACTCTGACCAGGACCAAGTTCCTAATTTCATTGGCCTTTCCTGACTTTCTTGAAGGCTCATC-
HSP: 	AACCACCTAAAAGTGAGGCCTTAAAAATA

db: 	ATATTAAAATCATATAAAACAGGGTTAGGCTCTAGGCTCAAATGATTATAAGCACGTGTTTCACCTACATCAGTGGCCAGAGATCCATTTGAAAGTCATGAGGTATAGGACAATTCAGGAACTGCCCTGTGCCTTTCAGGATGTCTAGCATCCTTGACCCCTCACCCATTAAATGCCAAT-AGTGGCCTCCAGTTGTGATAACCAAAAATGCCCCCACTTATTTCCTAATTGCCCCCCGGGGGGCAGTGCCATCCCTGCTGAGAACCACTATTCTAGACTATTCCAAGGTTTTCAGCCCCTTCAGAGCTCACCGTTCTCCCACTCTGACCAGGGCCAAGTTCCTAATTTCATTGGCCTTTCCTTACTTTCTTGCAGGCTCATCA
HSP: 	AGCCACCTAAAAGTGAGGCCTTAAAAATA

total_seeds:  420
stopped before ungapped extension:  2
stopped before gapped extension:  186
HSPs:  232
BEST SCORING ALIGNMENT: 
59541
59953

query: 	--------------CCTGAAGATGAAC-
HSP: 	ACACGGAGCTGCACAAGCACGGCCTGCTGCTCTTTGCTGAGATCCTGACTCGGTGAGCAAAGCGGTGGAACCTCAGGCCTGTAGGGGCCAGACTGTAGAAGGAAATTGAAGCTTTTATTCCTGGTGGGGTCTGTGATACGAGTTCACTCACTCGGTCCTCATTTACCATTTCTCAGGCAGCCGGAGGAGATCAAGCTGTTCACAAGCTCAGCCATGTGCGGAGATGCTGGCCGTCCCCTCCAAGAGGCAGTGAGCAGCCCTGTGCTGGAGGTGGCTGCTGAGGCAGTGAAGGCCTCTTCTGCTTTTCTGAGGTGAGAGACCCAGGCAGGCTGAACTTACCACCCCAGCATGTACCTGAGGGCCTGTCAAGGCCAGGGCTGCATTC

db: 	AGCCTACAGGGAAGCCTGAAGATGAACA
HSP: 	ACACAGAGCTGCACAAGCAGGGCCTGCTGCTCTTTGCTGAGATCCTGACTCGGTGAGCAAAGCGGTGGAACGTCAGGCCTGTAGGGGCCAGACTGGAGAAGGAAAGTGAAGCATTTATTCCTGGTGGGGTCAGTGATAGGAGTTCACTCACTCGTTCCTCATTTACCATTTCTCAGGCAGCCAGAGGAGATCAAGCTGTTCACAAGCTCAGCCATGTGCGGAGATGCTGGCCGTGCCCTCCAAGAGGCAGTGAGCAGCCCTGTGCTGGAGGTGGCTGCTGAGGCAGTGAAGGCCACTTCTGCTTTTCTGAGGTGAGAGACCCAGGCAGGCTGAACTTTCCACCCCTGCATGTACCTGAGGGCCTGTCAAGGCCAGGGCAGCATCC
3

total_seeds:  408
stopped before ungapped extension:  2
stopped before gapped extension:  182
HSPs:  224
BEST SCORING ALIGNMENT: 
274861
275274

query: 	--------------TCACTTTATCTCTTTTTTTTAGAACGGA-CCTCAGTGCTCCTGGGAGATT-TGTC-
HSP: 	AGTGAATCTCCTGTCTAAACCTCCCACGTTTCTCGGAATCCAGCCCTGCCCCAGCTGCTTCGGTAATTTAGTCCTTTCCTCTCTCCTCTTCTAATTATTTTTCTTGCTCTGAATTTGTCCAAGGACTTTAAAATCAAGTTGCTTATGATGGGTCTGGGGTCCACAGGAGGGAATGCCCCTGAGAGGGCCAGGCCTTCTGTGGTCCTTCCAGAGTGGGCTCTGGAGCTCACAGCCACCACTCTGACCCTCTGCAGATGTCCTTCGGCCTTCTCCGAGTGTTCTCCATTGTGATCCCCTTTCTCTATGACGGGACGCTCATTAGCAAGAACTTTGCTGCTCTACTTGA

db: 	GATGGCCATAACAATCACTTTATCTCTTTTTTTTAG-ACAGAGCCTCACTCCTCCTGGGAGGTTCT-TCA
HSP: 	AGCAATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGCCTGCGCCAGCTGGGCCAATTTTTTTGTATTTTCCCCTCTCCTCTTCTAATTATTTTTCTTGCTCTGAATTTTTCCAAGGACTTTAAAATCAAGTTGCTTATGATGGGTCTGGGGTCCACAGGAGGGAATGCCCCTGAGAGGGCCAGGCCTTCTGTGGTCCTGCCAGAGTGGGCTCTGGAGCTCACAGCCACCACTCTGACCCTCTGCAGATGTCCTTCGGCCTTCTCCGTGTGTTCTCCATTGTGATCCCCTTTCTCTATGTCGGGACGCTCATTAGCAAGAACTTTGCTGCTCT