# Current GEMstone bloom filter implementation

In [84]:
"""Bloom filter for genetic sequences. Stored as unsigned character array."""

import sys

from array import array
from collections import defaultdict

#BLOOM FILTER DEFAULTS
K = 16
H = hash
HASH_MAX = sys.maxsize + 1
SIZE = 12000

def encode(gene, size=SIZE, k=K, h=H):
    """Creates a bloom filter. Used to encode a genetic sequence.
    Args:
        gene: A string holding all or part of a DNA sequence.
        size: The size of the bloom filter. Set to the default size if
            no size is given.
        k: The size of the k-mer in the filter (how many nucleotides
            [characters] are encoded at once). Set to the default k if no
            k is given.
        h: The hash used to encode each k-mer entered in the bloom filter.
            Set to the default hash if no hash is given.
    Returns:
        The corresponding bloom filter. An array where each each entry a hashed
        k-mer maps to is a one and all other entries are zero.
        The number of of unique k-mers in the gene.
    """
    bf = initialize_bloom_filter(size)
    gene = gene.upper()                         # Make gene all uppercase.

    # Loop through all k-mers for gene.
    for n in range(0, len(gene)-k + 1):
        # Get k-mer of length k and hash it.
        k_mer = gene[n:n + k]                   # TODO: ignore case, 'N's?
        k_hash = (h(k_mer) + HASH_MAX) % size   # Make hash positive and within
                                                # size of filter.

        # Set entry the bloom filter corresponding to the hashed k-mer to one.
        bf[k_hash] = 1

    return bf

def initialize_bloom_filter(size=SIZE):
    """ Creates empty bloom filter.
    Args:
        size: The size of the bloom filter. Set to the default size if
            no size is given.
    Returns:
        An unsigned character array of the given size where each entry is 0.
        The empty bloom filter.
    """
    bf = array('b',[0])
    bf = bf * size
    return bf

def tostring(bf):
    """ Prints bloom filter on one line.
    Args:
        bf: The bloom filter.
    """
    for x in bf:
        print(x, end='')
    print()

In [26]:
query = 'CATGCATTAGTTATTAATAGTAATCAATTACGGGGTCATTAGTTCATAGCCCATATATGGAGTTCCGCGTTACATAACTTACGGTAAATGGCCCGCCTGGCTGACCGCCCAACGACCCCCGCCCATTGACGTCAATAATGACGTATGTTCCCATAGTAACGCCAATAGGGACTTTCCATTGACGTCAATGGGTGGAGTATTTACGGTAAACTGCCCACTTGGCAGTACATCAAGTGTATCATATGCCAAGTACGCCCCCTATTGACGTCAATGACGGTAAATGGCCCGCCTGGCATTATGCCCAGTACATGACCTTATGGGACTTTCCTACTTGGCAGTACATCTACGTATTAGTCATCGCTATTACCATGGTGATGCGGTTTTGGCAGTACATCAATGGGCGTGGATAGCGGTTTGACTCACGGGGATTTCCAAGTCTCCACCCCATTGACGTCAATGGGAGTTTGTTTTGGCACCAAAATCAACGGGACTTTCCAAAATGTCGTAACAACTCCGCCCCATTGACGCAAATGGGCGGTAGGCGTGTACGGTGGGAGGTCTATATAAGCAGAGCTGGTTTAGTGAACCGTCAGATCCGCTAGCGCTACCGGTCGCCACCATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTCCGGACTCAGATCTCGAGCTCAAGCTTCGAATTCTGCAGTCGACTCATTCGGGAGCTGGATGGCTTGGGACATGTGCAGCCAAGACTCTGTATGGAGTGACATAGAGTGTGCTGCTCTGGTTGGTGAGGACCAGCCTCTTTGCCCAGATCTTCCTGAACTTGACCTTTCTGAACTTGATGTGAATGACTTGGATACAGACAGCTTTCTGGGTGGATTGAAGTGGTGTAGCGACCAATCGGAAATCATATCCAACCAGTACAACAATGAGCCTGCGAACATATTTGAGAAGATAGATGAAGAGAATGAGGCAAACTTGCTAGCGGTCCTCACAGAGACACTGGACAGTCTCCCCGTGGATGAAGACGGATTGCCCTCATTTGATGCACTGACAGATGGAGCCGTGACCACTGACAACGAGGCCAGTCCTTCCTCCATGCCTGACGGCACCCCTCCCCCTCAGGAGGCAGAAGAGCCGTCTCTACTTAAGAAGCTCTTACTGGCACCAGCCAACACTCAGCTCAGCTACAATGAATGCAGCGGTCTTAGCACTCAGAACCATGCAGCAAACCACACCCACAGGATCAGAACAAACCCTGCCATTGTTAAGACCGAGAATTCATGGAGCAATAAAGCGAAGAGCATTTGTCAACAGCAAAAGCCACAAAGACGTCCCTGCTCAGAGCTTCTCAAGTATCTGACCACAAACGATGACCCTCCTCACACCAAACCCACAGAAAACAGGAACAGCAGCAGAGACAAATGTGCTTCCAAAAAGAAGTCCCATACACAACCGCAGTCGCAACATGCTCAAGCCAAACCAACAACTTTATCTCTTCCTCTGACCCCAGAGTCACCAAATGACCCCAAGGGTTCCCCATTTGAGAACAAGACTATTGAGCGAACCTTAAGTGTGGAACTCTCTGGAACTGCAGGCCTAACTCCTCCCACAACTCCTCCTCATAAAGCCAACCAAGATAACCCTTTCAAGGCTTCGCCAAAGCTGAAGCCCTCTTGCAAGACCGTGGTGCCACCGCCAACCAAGAGGGCCCGGTACAGTGAGTGTTCTGGTACCCAAGGCAGCCACTCCACCAAGAAAGGGCCCGAGCAATCTGAGTTGTACGCACAACTCAGCAAGTCCTCAGGGCTCAGCCGAGGACACGAGGAAAGGAAGACTAAACGGCCCAGTCTCCGGCTGTTTGGTGACCATGACTACTGTCAGTCACTCAATTCCAAAACGGATATACTCATTAACATATCACAGGAGCTCCAAGACTCTAGACAACTAGACTTCAAAGATGCCTCCTGTGACTGGCAGGGGCACATCTGTTCTTCCACAGATTCAGGCCAGTGCTACCTGAGAGAGACTTTGGAGGCCAGCAAGCAGGTCTCTCCTTGCAGCACCAGAAAACAGCTCCAAGACCAGGAAATCCGAGCGGAGCTGAACAAGCACTTCGGTCATCCCTGTCAAGCTGTGTTTGACGACAAATCAGACAAGACCAGTGAACTAAGGGATGGCGACTTCAGTAATGAACAATTCTCCAAACTACCTGTGTTTATAAATTCAGGACTAGCCATGGATGGCCTATTTGATGACAGTGAAGATGAAAGTGATAAACTGAGCTACCCTTGGGATGGCACGCAGCCCTATTCATTGTTCGATGTGTCGCCTTCTTGCTCTTCCTTTAACTCTCCGTGTCGAGACTCAGTGTCACCACCGAAATCCTTATTTTCTCAAAGACCCCAAAGGATGCGCTCTCGTTCAAGATCCTTTTCTCGACACAGGTCGTGTTCCCGATCACCATATTCCAGGTCAAGATCAAGGTCCCCAGGCAGTAGATCCTCTTCAAGATCCTGTTACTACTATGAATCAAGCCACTACAGACACCGCACACACCGCAATTCTCCCTTGTATGTGAGATCACGTTCAAGGTCACCCTACAGCCGTAGGCCCAGGTACGACAGCTATGAAGCCTATGAGCACGAAAGGCTCAAGAGGGATGAATACCGCAAAGAGCACGAGAAGCGGGAGTCTGAAAGGGCCAAACAGAGAGAGAGGCAGAAGCAGAAAGCAATTGAAGAGCGCCGTGTGATTTACGTTGGTAAAATCAGACCTGACACAACGCGGACAGAATTGAGAGACCGCTTTGAAGTTTTTGGTGAAATTGAGGAATGCACCGTAAATCTGCGGGATGATGGAGACAGCTATGGTTTCATCACCTACCGTTACACCTGTGACGCTTTCGCTGCTCTTGAGAATGGATATACTTTACGCAGGTCGAACGAAACTGACTTCGAGCTGTACTTTTGTGGACGGAAGCAATTTTTCAAGTCTAACTATGCAGACCTAGATACCAACTCAGACGATTTTGACCCTGCTTCCACCAAGAGCAAGTATGACTCTCTGGATTTTGATAGTTTACTGAAGGAAGCTCAGAGAAGCTTGCGCAGGTAACGTGTTCCCAGGCTGAGGGATGACAGGGATCCACCGGATCTAGATAACTGATCATAATCAGCCATACCACATTTGTAGAGGTTTTACTTGCTTTAAAAAACCTCCCACACCTCCCCCTGAACCTGAAACATAAAATGAATGCAATTGTTGTTGTTAACTTGTTTATTGCAGCTTATAATGGTTACAAATAAAGCAATAGCATCACAAATTTCACAAATAAAGCATTTTTTTCACTGCATTCTAGTTGTGGTTTGTCCAAACTCATCAATGTATCTTAACGCGTAAATTGTAAGCGTTAATATTTTGTTAAAATTCGCGTTAAATTTTTGTTAAATCAGCTCATTTTTTAACCAATAGGCCGAAATCGGCAAAATCCCTTATAAATCAAAAGAATAGACCGAGATAGGGTTGAGTGTTGTTCCAGTTTGGAACAAGAGTCCACTATTAAAGAACGTGGACTCCAACGTCAAAGGGCGAAAAACCGTCTATCAGGGCGATGGCCCACTACGTGAACCATCACCCTAATCAAGTTTTTTGGGGTCGAGGTGCCGTAAAGCACTAAATCGGAACCCTAAAGGGAGCCCCCGATTTAGAGCTTGACGGGGAAAGCCGGCGAACGTGGCGAGAAAGGAAGGGAAGAAAGCGAAAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTACAGGGCGCGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTCCTGAGGCGGAAAGAACCAGCTGTGGAATGTGTGTCAGTTAGGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCAGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCATAGTCCCGCCCCTAACTCCGCCCATCCCGCCCCTAACTCCGCCCAGTTCCGCCCATTCTCCGCCCCATGGCTGACTAATTTTTTTTATTTATGCAGAGGCCGAGGCCGCCTCGGCCTCTGAGCTATTCCAGAAGTAGTGAGGAGGCTTTTTTGGAGGCCTAGGCTTTTGCAAAGATCGATCAAGAGACAGGATGAGGATCGTTTCGCATGATTGAACAAGATGGATTGCACGCAGGTTCTCCGGCCGCTTGGGTGGAGAGGCTATTCGGCTATGACTGGGCACAACAGACAATCGGCTGCTCTGATGCCGCCGTGTTCCGGCTGTCAGCGCAGGGGCGCCCGGTTCTTTTTGTCAAGACCGACCTGTCCGGTGCCCTGAATGAACTGCAAGACGAGGCAGCGCGGCTATCGTGGCTGGCCACGACGGGCGTTCCTTGCGCAGCTGTGCTCGACGTTGTCACTGAAGCGGGAAGGGACTGGCTGCTATTGGGCGAAGTGCCGGGGCAGGATCTCCTGTCATCTCACCTTGCTCCTGCCGAGAAAGTATCCATCATGGCTGATGCAATGCGGCGGCTGCATACGCTTGATCCGGCTACCTGCCCATTCGACCACCAAGCGAAACATCGCATCGAGCGAGCACGTACTCGGATGGAAGCCGGTCTTGTCGATCAGGATGATCTGGACGAAGAGCATCAGGGGCTCGCGCCAGCCGAACTGTTCGCCAGGCTCAAGGCGAGCATGCCCGACGGCGAGGATCTCGTCGTGACCCATGGCGATGCCTGCTTGCCGAATATCATGGTGGAAAATGGCCGCTTTTCTGGATTCATCGACTGTGGCCGGCTGGGTGTGGCGGACCGCTATCAGGACATAGCGTTGGCTACCCGTGATATTGCTGAAGAGCTTGGCGGCGAATGGGCTGACCGCTTCCTCGTGCTTTACGGTATCGCCGCTCCCGATTCGCAGCGCATCGCCTTCTATCGCCTTCTTGACGAGTTCTTCTGAGCGGGACTCTGGGGTTCGAAATGACCGACCAAGCGACGCCCAACCTGCCATCACGAGATTTCGATTCCACCGCCGCCTTCTATGAAAGGTTGGGCTTCGGAATCGTTTTCCGGGACGCCGGCTGGATGATCCTCCAGCGCGGGGATCTCATGCTGGAGTTCTTCGCCCACCCTAGGGGGAGGCTAACTGAAACACGGAAGGAGACAATACCGGAAGGAACCCGCGCTATGACGGCAATAAAAAGACAGAATAAAACGCACGGTGTTGGGTCGTTTGTTCATAAACGCGGGGTTCGGTCCCAGGGCTGGCACTCTGTCGATACCCCACCGAGACCCCATTGGGGCCAATACGCCCGCGTTTCTTCCTTTTCCCCACCCCACCCCCCAAGTTCGGGTGAAGGCCCAGGGCTCGCAGCCAACGTCGGGGCGGCAGGCCCTGCCATAGCCTCAGGTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTTTTGATAATCTCATGACCAAAATCCCTTAACGTGAGTTTTCGTTCCACTGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCGTTATCCCCTGATTCTGTGGATAACCGTATTACCGC'
ref = 'catgcattagttattaatagtaatcaattacggggtcattagttcatagcccATATATGGAGTTCCGCGTTACATAACTTACGGTAAATGGCCCGCCTGGCTGACCGCCCAACGACCCCCGCCCATTGACGTCAATAATGACGTATGTTCCCATAGTAACGCCAATAGGGACTTTCCATTGACGTCAATGGGTGGAGTATTTACGGTAAACTGCCCACTTGGCAGTACATCAAGTGTATCATATGCCAAGTACGCCCCCTATTGACGTCAATGACGGTAAATGGCCCGCCTGGCATTATGCCCAGTACATGACCTTATGGGACTTTCCTACTTGGCAGTACATCTACGTATTAGTCATCGCTATTACCATGGTGATGCGGTTTTGGCAGTACATCAATGGGCGTGGATAGCGGTTTGACTCACGGGGATTTCCAAGTCTCCACCCCATTGACGTCAATGGGAGTTTGTTTTGGCACCAAAATCAACGGGACTTTCCAAAATGTCGTAACAACTCCGCCCCATTGACGCAAATGGGCGGTAGGCGTGTACGGTGGGAGGTCTATATAAGCAGAGCTGGTTTAGTGAACCGTCAGATCCGCTAGCGCTACCGGTCGCCACCATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTCCGGACTCAGATCTCGAGCTCAAGCTTCGAATTCTGCAGTCGACTCATTCGGGAGCTGGATGGCTTGGGACATGTGCAGCCAAGACTCTGTATGGAGTGACATAGAGTGTGCTGCTCTGGTTGGTGAGGACCAGCCTCTTTGCCCAGATCTTCCTGAACTTGACCTTTCTGAACTTGATGTGAATGACTTGGATACAGACAGCTTTCTGGGTGGATTGAAGTGGTGTAGCGACCAATCGGAAATCATATCCAACCAGTACAACAATGAGCCTGCGAACATATTTGAGAAGATAGATGAAGAGAATGAGGCAAACTTGCTAGCGGTCCTCACAGAGACACTGGACAGTCTCCCCGTGGATGAAGACGGATTGCCCTCATTTGATGCACTGACAGATGGAGCCGTGACCACTGACAACGAGGCCAGTCCTTCCTCCATGCCTGACGGCACCCCTCCCCCTCAGGAGGCAGAAGAGCCGTCTCTACTTAAGAAGCTCTTACTGGCACCAGCCAACACTCAGCTCAGCTACAATGAATGCAGCGGTCTTAGCACTCAGAACCATGCAGCAAACCACACCCACAGGATCAGAACAAACCCTGCCATTGTTAAGACCGAGAATTCATGGAGCAATAAAGCGAAGAGCATTTGTCAACAGCAAAAGCCACAAAGACGTCCCTGCTCAGAGCTTCTCAAGTATCTGACCACAAACGATGACCCTCCTCACACCAAACCCACAGAAAACAGGAACAGCAGCAGAGACAAATGTGCTTCCAAAAAGAAGTCCCATACACAACCGCAGTCGCAACATGCTCAAGCCAAACCAACAACTTTATCTCTTCCTCTGACCCCAGAGTCACCAAATGACCCCAAGGGTTCCCCATTTGAGAACAAGACTATTGAGCGAACCTTAAGTGTGGAACTCTCTGGAACTGCAGGCCTAACTCCTCCCACAACTCCTCCTCATAAAGCCAACCAAGATAACCCTTTCAAGGCTTCGCCAAAGCTGAAGCCCTCTTGCAAGACCGTGGTGCCACCGCCAACCAAGAGGGCCCGGTACAGTGAGTGTTCTGGTACCCAAGGCAGCCACTCCACCAAGAAAGGGCCCGAGCAATCTGAGTTGTACGCACAACTCAGCAAGTCCTCAGGGCTCAGCCGAGGACACGAGGAAAGGAAGACTAAACGGCCCAGTCTCCGGCTGTTTGGTGACCATGACTACTGTCAGTCACTCAATTCCAAAACGGATATACTCATTAACATATCACAGGAGCTCCAAGACTCTAGACAACTAGACTTCAAAGATGCCTCCTGTGACTGGCAGGGGCACATCTGTTCTTCCACAGATTCAGGCCAGTGCTACCTGAGAGAGACTTTGGAGGCCAGCAAGCAGGTCTCTCCTTGCAGCACCAGAAAACAGCTCCAAGACCAGGAAATCCGAGCGGAGCTGAACAAGCACTTCGGTCATCCCTGTCAAGCTGTGTTTGACGACAAATCAGACAAGACCAGTGAACTAAGGGATGGCGACTTCAGTAATGAACAATTCTCCAAACTACCTGTGTTTATAAATTCAGGACTAGCCATGGATGGCCTATTTGATGACAGTGAAGATGAAAGTGATAAACTGAGCTACCCTTGGGATGGCACGCAGCCCTATTCATTGTTCGATGTGTCGCCTTCTTGCTCTTCCTTTAACTCTCCGTGTCGAGACTCAGTGTCACCACCGAAATCCTTATTTTCTCAAAGACCCCAAAGGATGCGCTCTCGTTCAAGATCCTTTTCTCGACACAGGTCGTGTTCCCGATCACCATATTCCAGGTCAAGATCAAGGTCCCCAGGCAGTAGATCCTCTTCAAGATCCTGTTACTACTATGAATCAAGCCACTACAGACACCGCACACACCGCAATTCTCCCTTGTATGTGAGATCACGTTCAAGGTCACCCTACAGCCGTAGGCCCAGGTACGACAGCTATGAAGCCTATGAGCACGAAAGGCTCAAGAGGGATGAATACCGCAAAGAGCACGAGAAGCGGGAGTCTGAAAGGGCCAAACAGAGAGAGAGGCAGAAGCAGAAAGCAATTGAAGAGCGCCGTGTGATTTACGTTGGTAAAATCAGACCTGACACAACGCGGACAGAATTGAGAGACCGCTTTGAAGTTTTTGGTGAAATTGAGGAATGCACCGTAAATCTGCGGGATGATGGAGACAGCTATGGTTTCATCACCTACCGTTACACCTGTGACGCTTTCGCTGCTCTTGAGAATGGATATACTTTACGCAGGTCGAACGAAACTGACTTCGAGCTGTACTTTTGTGGACGGAAGCAATTTTTCAAGTCTAACTATGCAGACCTAGATACCAACTCAGACGATTTTGACCCTGCTTCCACCAAGAGCAAGTATGACTCTCTGGATTTTGATAGTTTACTGAAGGAAGCTCAGAGAAGCTTGCGCAGGTAACGTGTTCCCAGGCTGAGGGATGACAGGGATCCACCGGATCTAGATAACTGATCATAATCAGCCATACCACATTTGTAGAGGTTTTACTTGCTTTAAAAAACCTCCCACACCTCCCCCTGAACCTGAAACATAAAATGAATGCAATTGTTGTTGTTAACTTGTTTATTGCAGCTTATAATGGTTACAAATAAAGCAATAGCATCACAAATTTCACAAATAAAGCATTTTTTTCACTGCATTCTAGTTGTGGTTTGTCCAAACTCATCAATGTATCTTAACGCGTAAATTGTAAGCGTTAATATTTTGTTAAAATTCGCGTTAAATTTTTGTTAAATCAGCTCATTTTTTAACCAATAGGCCGAAATCGGCAAAATCCCTTATAAATCAAAAGAATAGACCGAGATAGGGTTGAGTGTTGTTCCAGTTTGGAACAAGAGTCCACTATTAAAGAACGTGGACTCCAACGTCAAAGGGCGAAAAACCGTCTATCAGGGCGATGGCCCACTACGTGAACCATCACCCTAATCAAGTTTTTTGGGGTCGAGGTGCCGTAAAGCACTAAATCGGAACCCTAAAGGGAGCCCCCGATTTAGAGCTTGACGGGGAAAGCCGGCGAACGTGGCGAGAAAGGAAGGGAAGAAAGCGAAAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTACAGGGCGCGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTCCTGAGGCGGAAAGAACCAGCTGTGGAATGTGTGTCAGTTAGGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCAGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCATAGTCCCGCCCCTAACTCCGCCCATCCCGCCCCTAACTCCGCCCAGTTCCGCCCATTCTCCGCCCCATGGCTGACTAATTTTTTTTATTTATGCAGAGGCCGAGGCCGCCTCGGCCTCTGAGCTATTCCAGAAGTAGTGAGGAGGCTTTTTTGGAGGCCTAGGCTTTTGCAAAGATCGATCAAGAGACAGGATGAGGATCGTTTCGCATGATTGAACAAGATGGATTGCACGCAGGTTCTCCGGCCGCTTGGGTGGAGAGGCTATTCGGCTATGACTGGGCACAACAGACAATCGGCTGCTCTGATGCCGCCGTGTTCCGGCTGTCAGCGCAGGGGCGCCCGGTTCTTTTTGTCAAGACCGACCTGTCCGGTGCCCTGAATGAACTGCAAGACGAGGCAGCGCGGCTATCGTGGCTGGCCACGACGGGCGTTCCTTGCGCAGCTGTGCTCGACGTTGTCACTGAAGCGGGAAGGGACTGGCTGCTATTGGGCGAAGTGCCGGGGCAGGATCTCCTGTCATCTCACCTTGCTCCTGCCGAGAAAGTATCCATCATGGCTGATGCAATGCGGCGGCTGCATACGCTTGATCCGGCTACCTGCCCATTCGACCACCAAGCGAAACATCGCATCGAGCGAGCACGTACTCGGATGGAAGCCGGTCTTGTCGATCAGGATGATCTGGACGAAGAGCATCAGGGGCTCGCGCCAGCCGAACTGTTCGCCAGGCTCAAGGCGAGCATGCCCGACGGCGAGGATCTCGTCGTGACCCATGGCGATGCCTGCTTGCCGAATATCATGGTGGAAAATGGCCGCTTTTCTGGATTCATCGACTGTGGCCGGCTGGGTGTGGCGGACCGCTATCAGGACATAGCGTTGGCTACCCGTGATATTGCTGAAGAGCTTGGCGGCGAATGGGCTGACCGCTTCCTCGTGCTTTACGGTATCGCCGCTCCCGATTCGCAGCGCATCGCCTTCTATCGCCTTCTTGACGAGTTCTTCTGAGCGGGACTCTGGGGTTCGAAATGACCGACCAAGCGACGCCCAACCTGCCATCACGAGATTTCGATTCCACCGCCGCCTTCTATGAAAGGTTGGGCTTCGGAATCGTTTTCCGGGACGCCGGCTGGATGATCCTCCAGCGCGGGGATCTCATGCTGGAGTTCTTCGCCCACCCTAGGGGGAGGCTAACTGAAACACGGAAGGAGACAATACCGGAAGGAACCCGCGCTATGACGGCAATAAAAAGACAGAATAAAACGCACGGTGTTGGGTCGTTTGTTCATAAACGCGGGGTTCGGTCCCAGGGCTGGCACTCTGTCGATACCCCACCGAGACCCCATTGGGGCCAATACGCCCGCGTTTCTTCCTTTTCCCCACCCCACCCCCCAAGTTCGGGTGAAGGCCCAGGGCTCGCAGCCAACGTCGGGGCGGCAGGCCCTGCCATAGCCTCAGGTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTTTTGATAATCTCATGACCAAAATCCCTTAACGTGAGTTTTCGTTCCACTGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCGTTATCCCCTGATTCTGTGGATAACCGTATTACCGC'

bf1 = encode(query)
bf2 = encode(ref)
len(bf)

12000

# New bloom filter functions

In [3]:
from pybloomfilter import BloomFilter

In [12]:
query = 'CATGCATTAGTTATTAATAGTAATCAATTACGGGGTCATTAGTTCATAGCCCATATATGGAGTTCCGCGTTACATAACTTACGGTAAATGGCCCGCCTGGCTGACCGCCCAACGACCCCCGCCCATTGACGTCAATAATGACGTATGTTCCCATAGTAACGCCAATAGGGACTTTCCATTGACGTCAATGGGTGGAGTATTTACGGTAAACTGCCCACTTGGCAGTACATCAAGTGTATCATATGCCAAGTACGCCCCCTATTGACGTCAATGACGGTAAATGGCCCGCCTGGCATTATGCCCAGTACATGACCTTATGGGACTTTCCTACTTGGCAGTACATCTACGTATTAGTCATCGCTATTACCATGGTGATGCGGTTTTGGCAGTACATCAATGGGCGTGGATAGCGGTTTGACTCACGGGGATTTCCAAGTCTCCACCCCATTGACGTCAATGGGAGTTTGTTTTGGCACCAAAATCAACGGGACTTTCCAAAATGTCGTAACAACTCCGCCCCATTGACGCAAATGGGCGGTAGGCGTGTACGGTGGGAGGTCTATATAAGCAGAGCTGGTTTAGTGAACCGTCAGATCCGCTAGCGCTACCGGTCGCCACCATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTCCGGACTCAGATCTCGAGCTCAAGCTTCGAATTCTGCAGTCGACTCATTCGGGAGCTGGATGGCTTGGGACATGTGCAGCCAAGACTCTGTATGGAGTGACATAGAGTGTGCTGCTCTGGTTGGTGAGGACCAGCCTCTTTGCCCAGATCTTCCTGAACTTGACCTTTCTGAACTTGATGTGAATGACTTGGATACAGACAGCTTTCTGGGTGGATTGAAGTGGTGTAGCGACCAATCGGAAATCATATCCAACCAGTACAACAATGAGCCTGCGAACATATTTGAGAAGATAGATGAAGAGAATGAGGCAAACTTGCTAGCGGTCCTCACAGAGACACTGGACAGTCTCCCCGTGGATGAAGACGGATTGCCCTCATTTGATGCACTGACAGATGGAGCCGTGACCACTGACAACGAGGCCAGTCCTTCCTCCATGCCTGACGGCACCCCTCCCCCTCAGGAGGCAGAAGAGCCGTCTCTACTTAAGAAGCTCTTACTGGCACCAGCCAACACTCAGCTCAGCTACAATGAATGCAGCGGTCTTAGCACTCAGAACCATGCAGCAAACCACACCCACAGGATCAGAACAAACCCTGCCATTGTTAAGACCGAGAATTCATGGAGCAATAAAGCGAAGAGCATTTGTCAACAGCAAAAGCCACAAAGACGTCCCTGCTCAGAGCTTCTCAAGTATCTGACCACAAACGATGACCCTCCTCACACCAAACCCACAGAAAACAGGAACAGCAGCAGAGACAAATGTGCTTCCAAAAAGAAGTCCCATACACAACCGCAGTCGCAACATGCTCAAGCCAAACCAACAACTTTATCTCTTCCTCTGACCCCAGAGTCACCAAATGACCCCAAGGGTTCCCCATTTGAGAACAAGACTATTGAGCGAACCTTAAGTGTGGAACTCTCTGGAACTGCAGGCCTAACTCCTCCCACAACTCCTCCTCATAAAGCCAACCAAGATAACCCTTTCAAGGCTTCGCCAAAGCTGAAGCCCTCTTGCAAGACCGTGGTGCCACCGCCAACCAAGAGGGCCCGGTACAGTGAGTGTTCTGGTACCCAAGGCAGCCACTCCACCAAGAAAGGGCCCGAGCAATCTGAGTTGTACGCACAACTCAGCAAGTCCTCAGGGCTCAGCCGAGGACACGAGGAAAGGAAGACTAAACGGCCCAGTCTCCGGCTGTTTGGTGACCATGACTACTGTCAGTCACTCAATTCCAAAACGGATATACTCATTAACATATCACAGGAGCTCCAAGACTCTAGACAACTAGACTTCAAAGATGCCTCCTGTGACTGGCAGGGGCACATCTGTTCTTCCACAGATTCAGGCCAGTGCTACCTGAGAGAGACTTTGGAGGCCAGCAAGCAGGTCTCTCCTTGCAGCACCAGAAAACAGCTCCAAGACCAGGAAATCCGAGCGGAGCTGAACAAGCACTTCGGTCATCCCTGTCAAGCTGTGTTTGACGACAAATCAGACAAGACCAGTGAACTAAGGGATGGCGACTTCAGTAATGAACAATTCTCCAAACTACCTGTGTTTATAAATTCAGGACTAGCCATGGATGGCCTATTTGATGACAGTGAAGATGAAAGTGATAAACTGAGCTACCCTTGGGATGGCACGCAGCCCTATTCATTGTTCGATGTGTCGCCTTCTTGCTCTTCCTTTAACTCTCCGTGTCGAGACTCAGTGTCACCACCGAAATCCTTATTTTCTCAAAGACCCCAAAGGATGCGCTCTCGTTCAAGATCCTTTTCTCGACACAGGTCGTGTTCCCGATCACCATATTCCAGGTCAAGATCAAGGTCCCCAGGCAGTAGATCCTCTTCAAGATCCTGTTACTACTATGAATCAAGCCACTACAGACACCGCACACACCGCAATTCTCCCTTGTATGTGAGATCACGTTCAAGGTCACCCTACAGCCGTAGGCCCAGGTACGACAGCTATGAAGCCTATGAGCACGAAAGGCTCAAGAGGGATGAATACCGCAAAGAGCACGAGAAGCGGGAGTCTGAAAGGGCCAAACAGAGAGAGAGGCAGAAGCAGAAAGCAATTGAAGAGCGCCGTGTGATTTACGTTGGTAAAATCAGACCTGACACAACGCGGACAGAATTGAGAGACCGCTTTGAAGTTTTTGGTGAAATTGAGGAATGCACCGTAAATCTGCGGGATGATGGAGACAGCTATGGTTTCATCACCTACCGTTACACCTGTGACGCTTTCGCTGCTCTTGAGAATGGATATACTTTACGCAGGTCGAACGAAACTGACTTCGAGCTGTACTTTTGTGGACGGAAGCAATTTTTCAAGTCTAACTATGCAGACCTAGATACCAACTCAGACGATTTTGACCCTGCTTCCACCAAGAGCAAGTATGACTCTCTGGATTTTGATAGTTTACTGAAGGAAGCTCAGAGAAGCTTGCGCAGGTAACGTGTTCCCAGGCTGAGGGATGACAGGGATCCACCGGATCTAGATAACTGATCATAATCAGCCATACCACATTTGTAGAGGTTTTACTTGCTTTAAAAAACCTCCCACACCTCCCCCTGAACCTGAAACATAAAATGAATGCAATTGTTGTTGTTAACTTGTTTATTGCAGCTTATAATGGTTACAAATAAAGCAATAGCATCACAAATTTCACAAATAAAGCATTTTTTTCACTGCATTCTAGTTGTGGTTTGTCCAAACTCATCAATGTATCTTAACGCGTAAATTGTAAGCGTTAATATTTTGTTAAAATTCGCGTTAAATTTTTGTTAAATCAGCTCATTTTTTAACCAATAGGCCGAAATCGGCAAAATCCCTTATAAATCAAAAGAATAGACCGAGATAGGGTTGAGTGTTGTTCCAGTTTGGAACAAGAGTCCACTATTAAAGAACGTGGACTCCAACGTCAAAGGGCGAAAAACCGTCTATCAGGGCGATGGCCCACTACGTGAACCATCACCCTAATCAAGTTTTTTGGGGTCGAGGTGCCGTAAAGCACTAAATCGGAACCCTAAAGGGAGCCCCCGATTTAGAGCTTGACGGGGAAAGCCGGCGAACGTGGCGAGAAAGGAAGGGAAGAAAGCGAAAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTACAGGGCGCGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTCCTGAGGCGGAAAGAACCAGCTGTGGAATGTGTGTCAGTTAGGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCAGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCATAGTCCCGCCCCTAACTCCGCCCATCCCGCCCCTAACTCCGCCCAGTTCCGCCCATTCTCCGCCCCATGGCTGACTAATTTTTTTTATTTATGCAGAGGCCGAGGCCGCCTCGGCCTCTGAGCTATTCCAGAAGTAGTGAGGAGGCTTTTTTGGAGGCCTAGGCTTTTGCAAAGATCGATCAAGAGACAGGATGAGGATCGTTTCGCATGATTGAACAAGATGGATTGCACGCAGGTTCTCCGGCCGCTTGGGTGGAGAGGCTATTCGGCTATGACTGGGCACAACAGACAATCGGCTGCTCTGATGCCGCCGTGTTCCGGCTGTCAGCGCAGGGGCGCCCGGTTCTTTTTGTCAAGACCGACCTGTCCGGTGCCCTGAATGAACTGCAAGACGAGGCAGCGCGGCTATCGTGGCTGGCCACGACGGGCGTTCCTTGCGCAGCTGTGCTCGACGTTGTCACTGAAGCGGGAAGGGACTGGCTGCTATTGGGCGAAGTGCCGGGGCAGGATCTCCTGTCATCTCACCTTGCTCCTGCCGAGAAAGTATCCATCATGGCTGATGCAATGCGGCGGCTGCATACGCTTGATCCGGCTACCTGCCCATTCGACCACCAAGCGAAACATCGCATCGAGCGAGCACGTACTCGGATGGAAGCCGGTCTTGTCGATCAGGATGATCTGGACGAAGAGCATCAGGGGCTCGCGCCAGCCGAACTGTTCGCCAGGCTCAAGGCGAGCATGCCCGACGGCGAGGATCTCGTCGTGACCCATGGCGATGCCTGCTTGCCGAATATCATGGTGGAAAATGGCCGCTTTTCTGGATTCATCGACTGTGGCCGGCTGGGTGTGGCGGACCGCTATCAGGACATAGCGTTGGCTACCCGTGATATTGCTGAAGAGCTTGGCGGCGAATGGGCTGACCGCTTCCTCGTGCTTTACGGTATCGCCGCTCCCGATTCGCAGCGCATCGCCTTCTATCGCCTTCTTGACGAGTTCTTCTGAGCGGGACTCTGGGGTTCGAAATGACCGACCAAGCGACGCCCAACCTGCCATCACGAGATTTCGATTCCACCGCCGCCTTCTATGAAAGGTTGGGCTTCGGAATCGTTTTCCGGGACGCCGGCTGGATGATCCTCCAGCGCGGGGATCTCATGCTGGAGTTCTTCGCCCACCCTAGGGGGAGGCTAACTGAAACACGGAAGGAGACAATACCGGAAGGAACCCGCGCTATGACGGCAATAAAAAGACAGAATAAAACGCACGGTGTTGGGTCGTTTGTTCATAAACGCGGGGTTCGGTCCCAGGGCTGGCACTCTGTCGATACCCCACCGAGACCCCATTGGGGCCAATACGCCCGCGTTTCTTCCTTTTCCCCACCCCACCCCCCAAGTTCGGGTGAAGGCCCAGGGCTCGCAGCCAACGTCGGGGCGGCAGGCCCTGCCATAGCCTCAGGTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTTTTGATAATCTCATGACCAAAATCCCTTAACGTGAGTTTTCGTTCCACTGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCGTTATCCCCTGATTCTGTGGATAACCGTATTACCGC'
ref = 'catgcattagttattaatagtaatcaattacggggtcattagttcatagcccATATATGGAGTTCCGCGTTACATAACTTACGGTAAATGGCCCGCCTGGCTGACCGCCCAACGACCCCCGCCCATTGACGTCAATAATGACGTATGTTCCCATAGTAACGCCAATAGGGACTTTCCATTGACGTCAATGGGTGGAGTATTTACGGTAAACTGCCCACTTGGCAGTACATCAAGTGTATCATATGCCAAGTACGCCCCCTATTGACGTCAATGACGGTAAATGGCCCGCCTGGCATTATGCCCAGTACATGACCTTATGGGACTTTCCTACTTGGCAGTACATCTACGTATTAGTCATCGCTATTACCATGGTGATGCGGTTTTGGCAGTACATCAATGGGCGTGGATAGCGGTTTGACTCACGGGGATTTCCAAGTCTCCACCCCATTGACGTCAATGGGAGTTTGTTTTGGCACCAAAATCAACGGGACTTTCCAAAATGTCGTAACAACTCCGCCCCATTGACGCAAATGGGCGGTAGGCGTGTACGGTGGGAGGTCTATATAAGCAGAGCTGGTTTAGTGAACCGTCAGATCCGCTAGCGCTACCGGTCGCCACCATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTCCGGACTCAGATCTCGAGCTCAAGCTTCGAATTCTGCAGTCGACTCATTCGGGAGCTGGATGGCTTGGGACATGTGCAGCCAAGACTCTGTATGGAGTGACATAGAGTGTGCTGCTCTGGTTGGTGAGGACCAGCCTCTTTGCCCAGATCTTCCTGAACTTGACCTTTCTGAACTTGATGTGAATGACTTGGATACAGACAGCTTTCTGGGTGGATTGAAGTGGTGTAGCGACCAATCGGAAATCATATCCAACCAGTACAACAATGAGCCTGCGAACATATTTGAGAAGATAGATGAAGAGAATGAGGCAAACTTGCTAGCGGTCCTCACAGAGACACTGGACAGTCTCCCCGTGGATGAAGACGGATTGCCCTCATTTGATGCACTGACAGATGGAGCCGTGACCACTGACAACGAGGCCAGTCCTTCCTCCATGCCTGACGGCACCCCTCCCCCTCAGGAGGCAGAAGAGCCGTCTCTACTTAAGAAGCTCTTACTGGCACCAGCCAACACTCAGCTCAGCTACAATGAATGCAGCGGTCTTAGCACTCAGAACCATGCAGCAAACCACACCCACAGGATCAGAACAAACCCTGCCATTGTTAAGACCGAGAATTCATGGAGCAATAAAGCGAAGAGCATTTGTCAACAGCAAAAGCCACAAAGACGTCCCTGCTCAGAGCTTCTCAAGTATCTGACCACAAACGATGACCCTCCTCACACCAAACCCACAGAAAACAGGAACAGCAGCAGAGACAAATGTGCTTCCAAAAAGAAGTCCCATACACAACCGCAGTCGCAACATGCTCAAGCCAAACCAACAACTTTATCTCTTCCTCTGACCCCAGAGTCACCAAATGACCCCAAGGGTTCCCCATTTGAGAACAAGACTATTGAGCGAACCTTAAGTGTGGAACTCTCTGGAACTGCAGGCCTAACTCCTCCCACAACTCCTCCTCATAAAGCCAACCAAGATAACCCTTTCAAGGCTTCGCCAAAGCTGAAGCCCTCTTGCAAGACCGTGGTGCCACCGCCAACCAAGAGGGCCCGGTACAGTGAGTGTTCTGGTACCCAAGGCAGCCACTCCACCAAGAAAGGGCCCGAGCAATCTGAGTTGTACGCACAACTCAGCAAGTCCTCAGGGCTCAGCCGAGGACACGAGGAAAGGAAGACTAAACGGCCCAGTCTCCGGCTGTTTGGTGACCATGACTACTGTCAGTCACTCAATTCCAAAACGGATATACTCATTAACATATCACAGGAGCTCCAAGACTCTAGACAACTAGACTTCAAAGATGCCTCCTGTGACTGGCAGGGGCACATCTGTTCTTCCACAGATTCAGGCCAGTGCTACCTGAGAGAGACTTTGGAGGCCAGCAAGCAGGTCTCTCCTTGCAGCACCAGAAAACAGCTCCAAGACCAGGAAATCCGAGCGGAGCTGAACAAGCACTTCGGTCATCCCTGTCAAGCTGTGTTTGACGACAAATCAGACAAGACCAGTGAACTAAGGGATGGCGACTTCAGTAATGAACAATTCTCCAAACTACCTGTGTTTATAAATTCAGGACTAGCCATGGATGGCCTATTTGATGACAGTGAAGATGAAAGTGATAAACTGAGCTACCCTTGGGATGGCACGCAGCCCTATTCATTGTTCGATGTGTCGCCTTCTTGCTCTTCCTTTAACTCTCCGTGTCGAGACTCAGTGTCACCACCGAAATCCTTATTTTCTCAAAGACCCCAAAGGATGCGCTCTCGTTCAAGATCCTTTTCTCGACACAGGTCGTGTTCCCGATCACCATATTCCAGGTCAAGATCAAGGTCCCCAGGCAGTAGATCCTCTTCAAGATCCTGTTACTACTATGAATCAAGCCACTACAGACACCGCACACACCGCAATTCTCCCTTGTATGTGAGATCACGTTCAAGGTCACCCTACAGCCGTAGGCCCAGGTACGACAGCTATGAAGCCTATGAGCACGAAAGGCTCAAGAGGGATGAATACCGCAAAGAGCACGAGAAGCGGGAGTCTGAAAGGGCCAAACAGAGAGAGAGGCAGAAGCAGAAAGCAATTGAAGAGCGCCGTGTGATTTACGTTGGTAAAATCAGACCTGACACAACGCGGACAGAATTGAGAGACCGCTTTGAAGTTTTTGGTGAAATTGAGGAATGCACCGTAAATCTGCGGGATGATGGAGACAGCTATGGTTTCATCACCTACCGTTACACCTGTGACGCTTTCGCTGCTCTTGAGAATGGATATACTTTACGCAGGTCGAACGAAACTGACTTCGAGCTGTACTTTTGTGGACGGAAGCAATTTTTCAAGTCTAACTATGCAGACCTAGATACCAACTCAGACGATTTTGACCCTGCTTCCACCAAGAGCAAGTATGACTCTCTGGATTTTGATAGTTTACTGAAGGAAGCTCAGAGAAGCTTGCGCAGGTAACGTGTTCCCAGGCTGAGGGATGACAGGGATCCACCGGATCTAGATAACTGATCATAATCAGCCATACCACATTTGTAGAGGTTTTACTTGCTTTAAAAAACCTCCCACACCTCCCCCTGAACCTGAAACATAAAATGAATGCAATTGTTGTTGTTAACTTGTTTATTGCAGCTTATAATGGTTACAAATAAAGCAATAGCATCACAAATTTCACAAATAAAGCATTTTTTTCACTGCATTCTAGTTGTGGTTTGTCCAAACTCATCAATGTATCTTAACGCGTAAATTGTAAGCGTTAATATTTTGTTAAAATTCGCGTTAAATTTTTGTTAAATCAGCTCATTTTTTAACCAATAGGCCGAAATCGGCAAAATCCCTTATAAATCAAAAGAATAGACCGAGATAGGGTTGAGTGTTGTTCCAGTTTGGAACAAGAGTCCACTATTAAAGAACGTGGACTCCAACGTCAAAGGGCGAAAAACCGTCTATCAGGGCGATGGCCCACTACGTGAACCATCACCCTAATCAAGTTTTTTGGGGTCGAGGTGCCGTAAAGCACTAAATCGGAACCCTAAAGGGAGCCCCCGATTTAGAGCTTGACGGGGAAAGCCGGCGAACGTGGCGAGAAAGGAAGGGAAGAAAGCGAAAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTACAGGGCGCGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTCCTGAGGCGGAAAGAACCAGCTGTGGAATGTGTGTCAGTTAGGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCAGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCATAGTCCCGCCCCTAACTCCGCCCATCCCGCCCCTAACTCCGCCCAGTTCCGCCCATTCTCCGCCCCATGGCTGACTAATTTTTTTTATTTATGCAGAGGCCGAGGCCGCCTCGGCCTCTGAGCTATTCCAGAAGTAGTGAGGAGGCTTTTTTGGAGGCCTAGGCTTTTGCAAAGATCGATCAAGAGACAGGATGAGGATCGTTTCGCATGATTGAACAAGATGGATTGCACGCAGGTTCTCCGGCCGCTTGGGTGGAGAGGCTATTCGGCTATGACTGGGCACAACAGACAATCGGCTGCTCTGATGCCGCCGTGTTCCGGCTGTCAGCGCAGGGGCGCCCGGTTCTTTTTGTCAAGACCGACCTGTCCGGTGCCCTGAATGAACTGCAAGACGAGGCAGCGCGGCTATCGTGGCTGGCCACGACGGGCGTTCCTTGCGCAGCTGTGCTCGACGTTGTCACTGAAGCGGGAAGGGACTGGCTGCTATTGGGCGAAGTGCCGGGGCAGGATCTCCTGTCATCTCACCTTGCTCCTGCCGAGAAAGTATCCATCATGGCTGATGCAATGCGGCGGCTGCATACGCTTGATCCGGCTACCTGCCCATTCGACCACCAAGCGAAACATCGCATCGAGCGAGCACGTACTCGGATGGAAGCCGGTCTTGTCGATCAGGATGATCTGGACGAAGAGCATCAGGGGCTCGCGCCAGCCGAACTGTTCGCCAGGCTCAAGGCGAGCATGCCCGACGGCGAGGATCTCGTCGTGACCCATGGCGATGCCTGCTTGCCGAATATCATGGTGGAAAATGGCCGCTTTTCTGGATTCATCGACTGTGGCCGGCTGGGTGTGGCGGACCGCTATCAGGACATAGCGTTGGCTACCCGTGATATTGCTGAAGAGCTTGGCGGCGAATGGGCTGACCGCTTCCTCGTGCTTTACGGTATCGCCGCTCCCGATTCGCAGCGCATCGCCTTCTATCGCCTTCTTGACGAGTTCTTCTGAGCGGGACTCTGGGGTTCGAAATGACCGACCAAGCGACGCCCAACCTGCCATCACGAGATTTCGATTCCACCGCCGCCTTCTATGAAAGGTTGGGCTTCGGAATCGTTTTCCGGGACGCCGGCTGGATGATCCTCCAGCGCGGGGATCTCATGCTGGAGTTCTTCGCCCACCCTAGGGGGAGGCTAACTGAAACACGGAAGGAGACAATACCGGAAGGAACCCGCGCTATGACGGCAATAAAAAGACAGAATAAAACGCACGGTGTTGGGTCGTTTGTTCATAAACGCGGGGTTCGGTCCCAGGGCTGGCACTCTGTCGATACCCCACCGAGACCCCATTGGGGCCAATACGCCCGCGTTTCTTCCTTTTCCCCACCCCACCCCCCAAGTTCGGGTGAAGGCCCAGGGCTCGCAGCCAACGTCGGGGCGGCAGGCCCTGCCATAGCCTCAGGTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTTTTGATAATCTCATGACCAAAATCCCTTAACGTGAGTTTTCGTTCCACTGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCGTTATCCCCTGATTCTGTGGATAACCGTATTACCGC'

bf1 = BloomFilter(SIZE, 0.01, '/tmp/bf1')
bf1.add(query)

loc = str.encode('/tmp/bf2')
bf2 = bf1.copy_template(loc)
bf2.add(ref)

False

In [10]:
bf1, bf2

(<BloomFilter capacity: 12000, error: 0.010, num_hashes: 6>,
 <BloomFilter capacity: 12000, error: 0.010, num_hashes: 6>)

In [21]:
bf1.intersection(bf2)
bf1.to_base64()

b'eJw1yk1vgjAAgOEf5KGRDpoedmiBIZ+lhC+5IVZSUegKMuXXbzHZe3uSVyR62SM35LVbPMEB1GaU+cG4Ery9CAtaZV/xDHtv+hmirLtNnLYzlfccX54iT855zRCcTci6VT0a7Khr4/NhPPCXfhTpcWVWpgBFYjK+mTbwVukytNI0X8qE4biC3qKAUkfI2NBmJ7E68oxaS/jE9DVMyH90V34Urqi+Iq//o22fDMnJzpkoieUY98P7AsHebiQySSjv/tZ9/gKGCEcu'

In [39]:
import pickle as p
import json

In [77]:
try:
    with open("../phe/addgene-plasmids-sequences.json") as data_file:
       raw_data = json.load(data_file)
    data_file.close()
except FileNotFoundError:
    print("\nFileNotFoundError: [Errno 2] No such file or directory: ",
        "'addgene-plasmids-sequences.json'\n")
    print("File should be in current folder. Encode failed.")
    sys.exit(2)

plasmids = raw_data['plasmids']
    
# Read in and organize important fields of data from the raw data.
for k in plasmids:
    name = k['name']
    if k['pi']:
        pi = k['pi'][0]
        if k['sequences']["public_addgene_full_sequences"]:
            sequence = k['sequences']["public_addgene_full_sequences"][0]
            bf = encode(sequence)
            gene = Gene(name = name, pi = pi, sequence = sequence, bloom = bf)
            data[id_] = gene
            id_ += 1

In [78]:
len(plasmids), len(data)

(55753, 11862)

In [70]:
import json
import pickle as p
from collections import defaultdict, namedtuple
import sys

# Gene data structure holding the name, principle investigator, a partial
# sequence and the corresponding bloom filter of a gene.

Gene = namedtuple("Gene", "name, pi, sequence, bloom")
data = p.load(open('../phe/encoded_addgene.p', 'rb'))

In [60]:
len(data)

5931

In [68]:
reset_selective Gene

Once deleted, variables cannot be recovered. Proceed (y/[n])?  y


In [81]:
q1 = 'CATGCATTAGTTATTAATAGTAATCAATTACGGGGTCATTAGTTCATAGCCCATATATGGAGTTCCGCGTTACATAACTTACGGTAAATGGCCCGCCTGGCTGACCGCCCAACGACCCCCGCCCATTGACGTCAATAATGACGTATGTTCCCATAGTAACGCCAATAGGGACTTTCCATTGACGTCAATGGGTGGAGTATTTACGGTAAACTGCCCACTTGGCAGTACATCAAGTGTATCATATGCCAAGTACGCCCCCTATTGACGTCAATGACGGTAAATGGCCCGCCTGGCATTATGCCCAGTACATGACCTTATGGGACTTTCCTACTTGGCAGTACATCTACGTATTAGTCATCGCTATTACCATGGTGATGCGGTTTTGGCAGTACATCAATGGGCGTGGATAGCGGTTTGACTCACGGGGATTTCCAAGTCTCCACCCCATTGACGTCAATGGGAGTTTGTTTTGGCACCAAAATCAACGGGACTTTCCAAAATGTCGTAACAACTCCGCCCCATTGACGCAAATGGGCGGTAGGCGTGTACGGTGGGAGGTCTATATAAGCAGAGCTGGTTTAGTGAACCGTCAGATCCGCTAGCGCTACCGGTCGCCACCATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGC'
q2 = 'TACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCCACCTGACGTCGACGGATCGGGAGATCTCCCGATCCCCTATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAGTTAAGCCAGTATCTGCT'
q3 = 'CATGCATTAGTTATTAATAGTAATCAATTACGGGGTCATTAGTTCATAGCCCATATATGGAGTTCCGCGTTACATAACTTACGGTAAATGGCCCGCCTGGCTGACCGCCCAACGACCCCCGCCCATTGACGTCAATAATGACGTATGTTCCCATAGTAACGCCAATAGGGACTTTCCATTGACGTCAATGGGTGGAGTATTTACGGTAAACTGCCCACTTGGCAGTACATCAAGTGTATCATATGCCAAGTACGCCCCCTATTGACGTCAATGACGGTAAATGGCCCGCCTGGCATTATGCCCAGTACATGACCTTATGGGACTTTCCTACTTGGCAGTACATCTACGTATTAGTCATCGCTATTACCATGGTGATGCGGTTTTGGCAGTACATCAATGGGCGTGGATAGCGGTTTGACTCACGGGGATTTCCAAGTCTCCACCCCATTGACGTCAATGGGAGTTTGTTTTGGCACCAAAATCAACGGGACTTTCCAAAATGTCGTAACAACTCCGCCCCATTGACGCAAATGGGCGGTAGGCGTGTACGGTGGGAGGTCTATATAAGCAGAGCTGGTTTAGTGAACCGTCAGATCCGCTAGCGCTACCGGTCGCCACCATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTCCGGACTCAGATCTCGAGCTCAAGCTTCGAATTCTGCAGTCGACTCATTCGGGAGCTGGATGGCTTGGGACATGTGCAGCCAAGACTCTGTATGGAGTGACATAGAGTGTGCTGCTCTGGTTGGTGAGGACCAGCCTCTTTGCCCAGATCTTCCTGAACTTGACCTTTCTGAACTTGATGTGAATGACTTGGATACAGACAGCTTTCTGGGTGGATTGAAGTGGTGTAGCGACCAATCGGAAATCATATCCAACCAGTACAACAATGAGCCTGCGAACATATTTGAGAAGATAGATGAAGAGAATGAGGCAAACTTGCTAGCGGTCCTCACAGAGACACTGGACAGTCTCCCCGTGGATGAAGACGGATTGCCCTCATTTGATGCACTGACAGATGGAGCCGTGACCACTGACAACGAGGCCAGTCCTTCCTCCATGCCTGACGGCACCCCTCCCCCTCAGGAGGCAGAAGAGCCGTCTCTACTTAAGAAGCTCTTACTGGCACCAGCCAACACTCAGCTCAGCTACAATGAATGCAGCGGTCTTAGCACTCAGAACCATGCAGCAAACCACACCCACAGGATCAGAACAAACCCTGCCATTGTTAAGACCGAGAATTCATGGAGCAATAAAGCGAAGAGCATTTGTCAACAGCAAAAGCCACAAAGACGTCCCTGCTCAGAGCTTCTCAAGTATCTGACCACAAACGATGACCCTCCTCACACCAAACCCACAGAAAACAGGAACAGCAGCAGAGACAAATGTGCTTCCAAAAAGAAGTCCCATACACAACCGCAGTCGCAACATGCTCAAGCCAAACCAACAACTTTATCTCTTCCTCTGACCCCAGAGTCACCAAATGACCCCAAGGGTTCCCCATTTGAGAACAAGACTATTGAGCGAACCTTAAGTGTGGAACTCTCTGGAACTGCAGGCCTAACTCCTCCCACAACTCCTCCTCATAAAGCCAACCAAGATAACCCTTTCAAGGCTTCGCCAAAGCTGAAGCCCTCTTGCAAGACCGTGGTGCCACCGCCAACCAAGAGGGCCCGGTACAGTGAGTGTTCTGGTACCCAAGGCAGCCACTCCACCAAGAAAGGGCCCGAGCAATCTGAGTTGTACGCACAACTCAGCAAGTCCTCAGGGCTCAGCCGAGGACACGAGGAAAGGAAGACTAAACGGCCCAGTCTCCGGCTGTTTGGTGACCATGACTACTGTCAGTCACTCAATTCCAAAACGGATATACTCATTAACATATCACAGGAGCTCCAAGACTCTAGACAACTAGACTTCAAAGATGCCTCCTGTGACTGGCAGGGGCACATCTGTTCTTCCACAGATTCAGGCCAGTGCTACCTGAGAGAGACTTTGGAGGCCAGCAAGCAGGTCTCTCCTTGCAGCACCAGAAAACAGCTCCAAGACCAGGAAATCCGAGCGGAGCTGAACAAGCACTTCGGTCATCCCTGTCAAGCTGTGTTTGACGACAAATCAGACAAGACCAGTGAACTAAGGGATGGCGACTTCAGTAATGAACAATTCTCCAAACTACCTGTGTTTATAAATTCAGGACTAGCCATGGATGGCCTATTTGATGACAGTGAAGATGAAAGTGATAAACTGAGCTACCCTTGGGATGGCACGCAGCCCTATTCATTGTTCGATGTGTCGCCTTCTTGCTCTTCCTTTAACTCTCCGTGTCGAGACTCAGTGTCACCACCGAAATCCTTATTTTCTCAAAGACCCCAAAGGATGCGCTCTCGTTCAAGATCCTTTTCTCGACACAGGTCGTGTTCCCGATCACCATATTCCAGGTCAAGATCAAGGTCCCCAGGCAGTAGATCCTCTTCAAGATCCTGTTACTACTATGAATCAAGCCACTACAGACACCGCACACACCGCAATTCTCCCTTGTATGTGAGATCACGTTCAAGGTCACCCTACAGCCGTAGGCCCAGGTACGACAGCTATGAAGCCTATGAGCACGAAAGGCTCAAGAGGGATGAATACCGCAAAGAGCACGAGAAGCGGGAGTCTGAAAGGGCCAAACAGAGAGAGAGGCAGAAGCAGAAAGCAATTGAAGAGCGCCGTGTGATTTACGTTGGTAAAATCAGACCTGACACAACGCGGACAGAATTGAGAGACCGCTTTGAAGTTTTTGGTGAAATTGAGGAATGCACCGTAAATCTGCGGGATGATGGAGACAGCTATGGTTTCATCACCTACCGTTACACCTGTGACGCTTTCGCTGCTCTTGAGAATGGATATACTTTACGCAGGTCGAACGAAACTGACTTCGAGCTGTACTTTTGTGGACGGAAGCAATTTTTCAAGTCTAACTATGCAGACCTAGATACCAACTCAGACGATTTTGACCCTGCTTCCACCAAGAGCAAGTATGACTCTCTGGATTTTGATAGTTTACTGAAGGAAGCTCAGAGAAGCTTGCGCAGGTAACGTGTTCCCAGGCTGAGGGATGACAGGGATCCACCGGATCTAGATAACTGATCATAATCAGCCATACCACATTTGTAGAGGTTTTACTTGCTTTAAAAAACCTCCCACACCTCCCCCTGAACCTGAAACATAAAATGAATGCAATTGTTGTTGTTAACTTGTTTATTGCAGCTTATAATGGTTACAAATAAAGCAATAGCATCACAAATTTCACAAATAAAGCATTTTTTTCACTGCATTCTAGTTGTGGTTTGTCCAAACTCATCAATGTATCTTAACGCGTAAATTGTAAGCGTTAATATTTTGTTAAAATTCGCGTTAAATTTTTGTTAAATCAGCTCATTTTTTAACCAATAGGCCGAAATCGGCAAAATCCCTTATAAATCAAAAGAATAGACCGAGATAGGGTTGAGTGTTGTTCCAGTTTGGAACAAGAGTCCACTATTAAAGAACGTGGACTCCAACGTCAAAGGGCGAAAAACCGTCTATCAGGGCGATGGCCCACTACGTGAACCATCACCCTAATCAAGTTTTTTGGGGTCGAGGTGCCGTAAAGCACTAAATCGGAACCCTAAAGGGAGCCCCCGATTTAGAGCTTGACGGGGAAAGCCGGCGAACGTGGCGAGAAAGGAAGGGAAGAAAGCGAAAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTACAGGGCGCGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTCCTGAGGCGGAAAGAACCAGCTGTGGAATGTGTGTCAGTTAGGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCAGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCATAGTCCCGCCCCTAACTCCGCCCATCCCGCCCCTAACTCCGCCCAGTTCCGCCCATTCTCCGCCCCATGGCTGACTAATTTTTTTTATTTATGCAGAGGCCGAGGCCGCCTCGGCCTCTGAGCTATTCCAGAAGTAGTGAGGAGGCTTTTTTGGAGGCCTAGGCTTTTGCAAAGATCGATCAAGAGACAGGATGAGGATCGTTTCGCATGATTGAACAAGATGGATTGCACGCAGGTTCTCCGGCCGCTTGGGTGGAGAGGCTATTCGGCTATGACTGGGCACAACAGACAATCGGCTGCTCTGATGCCGCCGTGTTCCGGCTGTCAGCGCAGGGGCGCCCGGTTCTTTTTGTCAAGACCGACCTGTCCGGTGCCCTGAATGAACTGCAAGACGAGGCAGCGCGGCTATCGTGGCTGGCCACGACGGGCGTTCCTTGCGCAGCTGTGCTCGACGTTGTCACTGAAGCGGGAAGGGACTGGCTGCTATTGGGCGAAGTGCCGGGGCAGGATCTCCTGTCATCTCACCTTGCTCCTGCCGAGAAAGTATCCATCATGGCTGATGCAATGCGGCGGCTGCATACGCTTGATCCGGCTACCTGCCCATTCGACCACCAAGCGAAACATCGCATCGAGCGAGCACGTACTCGGATGGAAGCCGGTCTTGTCGATCAGGATGATCTGGACGAAGAGCATCAGGGGCTCGCGCCAGCCGAACTGTTCGCCAGGCTCAAGGCGAGCATGCCCGACGGCGAGGATCTCGTCGTGACCCATGGCGATGCCTGCTTGCCGAATATCATGGTGGAAAATGGCCGCTTTTCTGGATTCATCGACTGTGGCCGGCTGGGTGTGGCGGACCGCTATCAGGACATAGCGTTGGCTACCCGTGATATTGCTGAAGAGCTTGGCGGCGAATGGGCTGACCGCTTCCTCGTGCTTTACGGTATCGCCGCTCCCGATTCGCAGCGCATCGCCTTCTATCGCCTTCTTGACGAGTTCTTCTGAGCGGGACTCTGGGGTTCGAAATGACCGACCAAGCGACGCCCAACCTGCCATCACGAGATTTCGATTCCACCGCCGCCTTCTATGAAAGGTTGGGCTTCGGAATCGTTTTCCGGGACGCCGGCTGGATGATCCTCCAGCGCGGGGATCTCATGCTGGAGTTCTTCGCCCACCCTAGGGGGAGGCTAACTGAAACACGGAAGGAGACAATACCGGAAGGAACCCGCGCTATGACGGCAATAAAAAGACAGAATAAAACGCACGGTGTTGGGTCGTTTGTTCATAAACGCGGGGTTCGGTCCCAGGGCTGGCACTCTGTCGATACCCCACCGAGACCCCATTGGGGCCAATACGCCCGCGTTTCTTCCTTTTCCCCACCCCACCCCCCAAGTTCGGGTGAAGGCCCAGGGCTCGCAGCCAACGTCGGGGCGGCAGGCCCTGCCATAGCCTCAGGTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTTTTGATAATCTCATGACCAAAATCCCTTAACGTGAGTTTTCGTTCCACTGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCGTTATCCCCTGATTCTGTGGATAACCGTATTACCGC'
len(q1), len(q2), len(q3)

(838, 975, 7151)