In [1]:
import random
def generate_reads(seq, k, min_overlap, max_overlap,seed=None):
    random.seed(seed)
    reads = []
    for i in range(0, len(seq)-k+1):
        # Generate a random overlap within the specified range
        overlap = random.randint(min_overlap, max_overlap)
        start = i
        end = i + k + overlap
        reads.append(seq[start:end])
    return reads

def generate_genome_sequence(n,seed=None):
    random.seed(seed)
    nucleotides = {1:'A',2:'C',3:'G',4:'T'}
    seq = ''
    for i in range(n):
        seq += nucleotides[random.randint(1,4)]
    return seq

def remove_containments(reads):
    i = 0
    r = set()
    m = max([len(r) for r in reads])
    b = False
    while i < len(reads):
        if len(reads[i]) == m: 
            r.add(reads[i])
            i+=1
            continue
        for r_ in r:
            if reads[i] in r_: 
                b = True
                break
        if b: 
            b = False
            i+=1
            continue
        r.add(reads[i])
        i+=1
    return list(r)

In [2]:
'''
DESCRIPTION
INPUT
OUTPUT
'''
class Stalk:
    def __init__(self,stalk):
        self.stalk = stalk
        if len(self.stalk) == 0: self.stalk = '$'

    def __repr__(self): return self.stalk

    def __eq__(self,other): return self[0] == other[0]

    def __hash__(self): return hash(self[0])

    def __getitem__(self,index): return self.stalk[index]

    def __len__(self):
        if self.stalk == '$' or self.stalk == '^': return 0
        return len(self.stalk)
    
    def __str__(self):
        if self.stalk =='$': return ''
        else: return self.stalk

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def common_substring(self,other):
        i = 0
        substr = ''
        if type(other) == str: other = Stalk(other)
        while i < min(len(self),len(other)) and self[i] == other[i]:
            substr += self[i]
            i += 1
        return Stalk(substr), Stalk(self[i:]), Stalk(other[i:])

In [3]:
'''
DESCRIPTION
INPUT
OUTPUT
'''
class Leaf:
    def __init__(self,left,right=''):
        if len(left) == 0:
            self.left = left
            self.right = 1
        else:
            self.left = left
            self.right = Leaf(right)
    
    def __repr__(self): return str(self.right)

    def __eq__(self,other): return self[0] == other[0]

    def __hash__(self): return hash(self.left)

    def __getitem__(self,index): return self.left[index]

    def __len__(self):
        if self.left == '$': return 0
        return len(self.left)
    
    def __is_shallow__(self): return True

    def reads(self): return set()

In [4]:
class Sequence:
    def __init__(self,seq=None,seen=None):
        self.extensions = []
        self.contains = set()
        if seq is None: 
            self.seq = ''
            self.seen = tuple()
        else: 
            self.seq = seq
            if seen is None: self.seen = (seq,)
            else: self.seen = seen

    def __repr__(self): return self.seq

    def copy(self):
        c = Sequence()
        c.extensions = self.extensions
        c.contains = self.contains
        c.seq = self.seq
        c.seen = self.seen
        return c
    
    def reset(self, other):
        self.extensions = other.extensions
        self.contains = other.contains
        self.seq = other.seq
        self.seen = other.seen

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def add(self,item): 
        if item not in self.seen: self.seen = tuple(list(self.seen) + [item]) 

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def __add__(self,other):
        self.seq += other.seq
        self.seen = tuple(list(self.seen)+list(other.seen))
        for a in set(self.extensions).intersection(self.seen):
            self.extensions.remove(a)
        for a in set(other.extensions).intersection(self.seen):
            other.extensions.remove(a)
        self.extensions+=other.extensions
        return self

In [5]:
'''
DESCRIPTION
INPUT
OUTPUT
    '''
class Branch:
    def __init__(self):
        self.b = {}
        self.s = {}

    def __repr__(self): return repr(self.b)

    def __str__(self):
        s = '' 
        for i in range(len(list(self.b.values()))-1):s+=str(list(self.b.values())[i])+'\n'
        return s+str(list(self.b.values())[-1])
    
    def __getitem__(self,index):
        if type(index) == str: return self.b[Stalk(index)]
        return self.b[index]
    
    def reads(self):
        r = set()
        for s in self.s.values():
            r.update(s[1])
        return r

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def __is_shallow__(self):
        for a in self.b.values():
            if type(a) == Branch: return False
        return True

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def __traverse__(self,context):
        b = self[context[0]]
        s = self.s[context[0]]
        context = context[len(s[0]):]
        while len(context) > 0 and len(b) > 1:
            s = b.s[context[0]]
            b = b[context[0]]
            context = context[len(s[0]):]
        return b
    
    def __setitem__(self,index,value):
        if type(index) == str: self.b[Stalk(index)] = value
        else: self.b[index] = value

    def __contains__(self,other): 
        if type(other) == str: return Stalk(other) in self.b
        return other in self.b

    def __len__(self): return len(self.b)

    def pop(self,index): return self.b.pop(index)

    '''
    DESCRIPTION
        adds a suffix to the trie
    INPUT
        stalk | a Stalk() which is a common substring of every read up to this point and beyond
        reads | a set of reads which have with the same common substring up to this point
    '''
    def add(self,stalk,reads):
        if stalk in self:
            if not len(stalk):
                self[stalk].right+=1
                self.s[stalk][1].update(reads)
                return
            if type(self[stalk]) == Leaf:
                branch = Branch()
                l1 = self.pop(stalk)
                stalk_ = list(self.s.pop(stalk))
                stalk_[0],l1.left,l2 = stalk_[0].common_substring(stalk)
                stalk_[1].update(reads)
                branch.add(l1.left,stalk_[1].copy())
                branch.add(l2,reads)
                stalk_ = tuple(stalk_)
                self[stalk_[0]] = branch
                self.s[stalk_[0]] = stalk_
            else:
                stalk_ = list(self.s.pop(stalk))
                branch = self.pop(stalk)
                stalk_[0],bstalk,stalk = stalk_[0].common_substring(stalk)
                if len(bstalk):
                    br = Branch()
                    br[bstalk] = branch 
                    br.s[bstalk] = (bstalk,stalk_[1].copy())
                    br.add(stalk,reads)
                    self[stalk_[0]] = br
                else: 
                    branch.add(stalk,reads)
                stalk_[1].update(reads)
                stalk_ = tuple(stalk_)
                if not len(bstalk): self[stalk_[0]] = branch
                self.s[stalk_[0]] = stalk_
        else:
            if type(stalk) == str: stalk = Stalk(stalk)
            self.s[stalk] = (stalk,reads)
            self[stalk] = Leaf(stalk)

    '''
    DESCRIPTION
        a method which returns all reads beyond a certain point on a branch
    INPUT
        exclude    | a list of all reads which should not be added as an extension
        context    | the path from the root up to that point in the branch
        t          | a string which holds the path to the read from the context
    OUTPUT
        extensions | a dictionary of all possible extensions indexed on the read and pointing
                   | to a 3-tuple holding the context, a Sequence() of the read along the path
                   | a Sequence() of the read after the context 
    '''
    def extensions(self,exclude,context,sequence):
        extensions = []
        for s in self.s:
            if type(self[s]) is Branch:
                for b in self[s].b:
                    if type(self[s].b[b]) is Branch:
                        extensions += self[s].b[b].extensions(exclude,context,sequence)
                        continue
                    for read in self[s].s[self[s].b[b].left][1]:
                        if read in exclude: continue
                        if read in sequence.seq:
                            sequence.contains.add(read)
                            continue
                        t = read
                        for _ in range(read.count(context)):
                            t = t.partition(context)[2]
                            extensions += [(context,\
                                            Sequence(read),\
                                            Sequence(t,(read,)))]
                        exclude.add(read)
            else:
                for read in self.s[s][1]: 
                    if read in exclude: continue
                    if read in sequence.seq:
                            sequence.contains.add(read)
                            continue
                    t = read
                    for _ in range(read.count(context)):
                        t = t.partition(context)[2]
                        extensions += [(context,\
                            Sequence(read),\
                            Sequence(t,(read,)))]
                    exclude.add(read)
        return extensions

In [6]:
'''
DESCRIPTION
    an object which constructs a suffix trie out of fragments of a sequence and can traverse 
    the trie to resconstruct some target sequence
INPUT
    reads | a list of strings which overlap and are fragments of a longer sequence
'''
class Sequitur:
    def __init__(self,reads,correct_sequence=None,k=3):
        if correct_sequence is not None: self.correct_sequence = correct_sequence
        self.branch = Branch()
        self.reads = reads
        for read in reads:
            for i in range(len(read)): 
                self.branch.add(Stalk(read[i:]),{read})
        self.sequence = Sequence()
        self.k = k

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def context_depth_sort(self,e): return e[1].seq.find(e[0]+e[2].seq)


    '''
    DESCRIPTION
        method that attempts to extend a sequence or join a prefix to a suffix
    INPUT
        sequence | a Sequence() with the extension read
        prefix   | a Sequence() that has been previously extended with the prefix read
        suffix   | a Sequence() that has reached the terminus
    OUTPUT
        whether or not sequence was successfully extended
    '''
    def extend(self,sequence,proto,prime=None):
        if len(proto.seq) == 0: proto = sequence                                                                          # set an empty prefix to be the first extension read
        extensions = sequence.extensions
        extensions.sort(key=self.context_depth_sort,reverse=True)
        copy = sequence.copy(),proto.copy(),prime.copy()
        if len(prime.seq) and prime.seq in proto.seq:
            proto.contains.update(prime.contains.union(prime.seen))
            if self.sequitur(self.sequence,proto): return True
            else: self.sequence.reset(copy[0]),proto.reset(copy[2])
        for extension in extensions:
            if extension[1].seq in proto.seq:
                proto.contains.add(extension[1].seq)
                continue
            if len(prime.seq):
                if prime.seq.startswith(extension[1].seq) and\
                    proto.seq.endswith(extension[1].seq[:extension[1].seq.find(extension[0]+extension[2].seq)+len(extension[0])]):#and\
                    if prime.seq.endswith(proto.seen[0]):
                        prime.seen = prime.seen[:-1]
                        prime.seq = prime.seq[prime.seq.find(extension[2].seq):prime.seq.find(prime.seen[-1])+len(prime.seen[-1])]
                        if self.sequitur(Sequence(prime.seen[-1]),proto+prime,Sequence()): return True
                        else: self.sequence.reset(copy[0]),proto.reset(copy[1]),prime.reset(copy[2])
                    prime.seq = prime.seq[prime.seq.find(extension[0])+len(extension[0]):]
                    if self.sequitur(Sequence(prime.seen[-1]),proto+prime,Sequence()): return True
                    else: self.sequence.reset(copy[0]),proto.reset(copy[1]),prime.reset(copy[2])
                elif len(proto.contains.union(proto.seen).intersection(self.branch.__traverse__(extension[0]).reads())) > 1\
                        and extension[1].seq in prime.contains.union(prime.seen).union(proto.contains.union(proto.seen)):
                    if extensions.index(extension) == len(extensions)-1: return False
                    else: continue
                elif extension[1].seq in prime.seq: prime.contains.add(extension[1].seq)
            if proto.seq.endswith(extension[1].seq[:extension[1].seq.find(extension[0]+extension[2].seq)+len(extension[0])]) and\
               len(self.branch.__traverse__(extension[1].seq[:extension[1].seq.find(extension[0]+extension[2].seq)+len(extension[0])]).reads()\
                   .difference(proto.contains.union(proto.seen))
                   .intersection([e[1].seq for e in extensions])) == 1:
                if self.sequitur(extension[1].copy(),proto+extension[2],prime): return True
                else: self.sequence.reset(copy[0]),proto.reset(copy[1]),prime.reset(copy[2])
            if self.sequence.seq in extension[1].seq:
                extension[1].contains.update(self.sequence.contains.union(self.sequence.seen))
                if (self.sequence.seq == proto.seq and self.sequitur(extension[1],extension[1],prime)): return True
                if self.sequitur(extension[1],proto,prime): return True
                else: self.sequence.reset(copy[0]),proto.reset(copy[1]),prime.reset(copy[2])

    '''
    DESCRIPTION
        a method which traverses a suffix trie and attempts to reconstruct the sequence from which the 
        trie was constructed
    INPUT
        sequence | a Sequence() with a read chosen as an initus
        prefix   | an optional Sequence() which has been extended but has not reached the terminus. 
                 | defaults to and empty Sequence()
        suffix   | an optional Sequence() which has reached the terminus. defaults to an empty 
                 | Sequence().
        seed     | an optional integer to seed the randomiser. defaults to None.
    '''
    def sequitur(self,sequence,proto=Sequence(),prime=Sequence(),seed=None):
        import random#, math
        if len(proto.seq) == 0: proto = sequence                                                                          # set an empty prefix to be the first extension read
        if seed is not None: random.seed(seed)
        if not len(set(self.reads).difference(proto.contains.union(proto.seen))): 
            self.sequence = proto
            return True
        if not len(set(self.reads).difference(prime.contains.union(prime.seen))): 
            self.sequence = prime
            return True
        elif len(sequence.seq): self.sequence = sequence
        else:
            self.sequence = Sequence(
                                random.choice(
                                    list(
                                        set(self.reads).difference(proto.contains.union(proto.seen)\
                                                                   .union(prime.contains.union(prime.seen))))))
        i = self.k
        context = self.sequence.seq[-i:]
        seen = self.sequence.contains\
                        .union(self.sequence.seen)\
                        .union(proto.contains\
                        .union(proto.seen))
        branch = self.branch.__traverse__(context)
        if type(branch) is Leaf:
            if self.sequitur(Sequence(
                                random.choice(
                                    list(
                                        set(self.reads).difference(proto.contains.union(proto.seen)\
                                                                   .union(prime.contains.union(prime.seen)))))),prime=proto): return True
            else: return False
        self.sequence.extensions.clear()
        extensions = branch.extensions(seen.copy(),context,self.sequence)
        while i < len(self.sequence.seq) and not branch.__is_shallow__():
            i += 1
            context = self.sequence.seq[-i:]
            branch = self.branch.__traverse__(context)
            if type(branch) is Branch: extensions = branch.extensions(seen.copy(),context,self.sequence)
            if type(branch) is Leaf or not len(extensions):
                i -= 1
                context = self.sequence.seq[-i:]
                branch = self.branch.__traverse__(context)
                extensions = branch.extensions(seen.copy(),context,self.sequence)
                break
        self.sequence.extensions += extensions
        while i > 0:
            if self.extend(self.sequence,proto,prime):
                return True
            else:
                for read in set(self.reads).difference(proto.contains.union(proto.seen)):
                    if read in proto.seq: proto.contains.add(read)
                if not len(set(self.reads).difference(proto.contains.union(proto.seen))): 
                    self.sequence = proto
                    return True
                if not len(set(self.reads).difference(prime.contains.union(prime.seen))): 
                    self.sequence = prime
                    return True
                for exclusion in set(s if context in s else None for s in seen)\
                    .difference({None})\
                        .intersection(proto.contains.union(proto.seen))\
                            .difference(self.sequence.seen): 
                    t = exclusion
                    for _ in range(exclusion.count(context)):
                        t = t.rpartition(context)
                        if exclusion in self.sequence.seq:
                            proto.contains.add(exclusion)
                            continue
                        if len(t[0]) and self.sequence.seq.endswith(t[0]+t[1]): return False
                        t = t[0]
                if i > self.k:
                    i -= 1
                    context = self.sequence.seq[-i:]
                    branch = self.branch.__traverse__(context)
                    if type(branch) is Leaf:
                        if len(prime.seq): return False
                        else: return self.sequitur(Sequence(
                                                    random.choice(
                                                        list(
                                                            set(self.reads)\
                                                                .difference(proto.contains.union(proto.seen))))),Sequence(),proto)
                    extensions = branch.extensions(seen.copy(),context,self.sequence)
                    self.sequence.extensions.clear()
                    self.sequence.extensions+=extensions
                    continue
                elif len(prime.seq): 
                    if len(proto.seen) == 1:
                        if len(set(self.reads).difference(prime.contains.union(prime.seen))): return self.sequitur(Sequence(
                                                random.choice(
                                                    list(
                                                        set(self.reads)\
                                                            .difference(prime.contains.union(prime.seen))))),Sequence(),prime)
                        else: 
                            self.sequence = prime
                            return True
                    elif len(set([a if a in proto.seq else None for a in prime.contains.union(prime.seen)]).difference({None})):
                        proto.contains.update(prime.contains.union(prime.seen))
                        return self.sequitur(Sequence(
                                                random.choice(
                                                    list(
                                                        set(self.reads)\
                                                            .difference(proto.contains.union(proto.seen))))),Sequence(),proto)
                    elif len(set(self.reads).difference(proto.contains.union(proto.seen)\
                                                        .union(prime.contains.union(prime.seen)))):
                        return self.sequitur(Sequence(
                                            random.choice(
                                                list(
                                                    set(self.reads).difference(proto.contains.union(proto.seen)\
                                                        .union(prime.contains.union(prime.seen)))))))
                else:
                    return self.sequitur(Sequence(
                                            random.choice(
                                                list(
                                                    set(self.reads)\
                                                        .difference(proto.contains.union(proto.seen))))),Sequence(),proto)

In [7]:
seed = 5
sequence = generate_genome_sequence(200,seed=seed)
reads = remove_containments(generate_reads(sequence,3,4,10,seed=seed))
# reads = generate_reads(sequence,3,4,10,seed=seed)
sequitur = Sequitur(reads,sequence)
sequitur.sequitur(Sequence(reads[0]),seed=seed)
sequitur.sequence.seq ==  sequence

In [47]:
i = 0
for read in reads:
    sequitur.sequitur(Sequence(read),seed=seed)
    print(i,sequitur.sequence.seq ==  sequence)
    i+=1

: 

: 

In [None]:
seed = 0
# sequence = 'betty_bought_butter_the_butter_was_bitter_betty_bought_better_butter_to_make_the_bitter_butter_better'
# reads = ['betty_bought_butter_th',
#                         'tter_the_butter_was_',
#                               'he_butter_was_bitter_',
#                                          'as_bitter_betty_bought',
#                                                      'tty_bought_better_butter_t',
#                                                                      'r_butter_to_make_the_',
#                                                                                    'ke_the_bitter_butter_better']
# sequence = 'you say hello world, i bellow go to hell'
# reads = ['you say hel',
#             ' say hello wo',
#                     'lo world, i be',
#                           'ld, i bellow go t',
#                                     'ow go to hell']
sequence = 'she_sells_sea_shells_on_the_sea_shore'
reads = ['she_sells_s',
               'lls_sea_shel',
                    'ea_shells_o',
                       'shells_on_the_s',
                                  'he_sea_s',
                                      'ea_shore']
sequitur = Sequitur(reads,sequence)
for read in reads:
    sequitur.sequitur(Sequence(read),seed=seed)
    print(read,'|',sequitur.sequence.seq,'|',sequitur.sequence.seq==sequence)
# sequitur.sequitur(Sequence(reads[1]),seed=seed)
# sequitur.sequence.seq == sequence

In [7]:
sequence = 'you say hello world, i bellow go to hell'
reads = ['you say hel',
            ' say hello wo',
                    'lo world, i be',
                          'ld, i bellow go t',
                                    'ow go to hell']
sequitur = Sequitur(reads,sequence)
successes = 0
n = 200
for seed in range(n):    
    for read in reads:
        s = 'Seed: ' + str(seed) + ' | Initus: ' + read + ' | '
        sequitur.sequitur(Sequence(read),seed=seed)
        if sequitur.sequence.seq == sequence: 
            s+='SUCCESS'
            successes+=1
        else: s+='FAILURE'
        print(s + ' | ' + sequitur.sequence.seq)
        print('-----------------------------------------')
print('ACCURACY: '+str((successes/(200*len(reads)))*100)+'%')

Seed: 0 | Initus: you say hel | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 0 | Initus:  say hello wo | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 0 | Initus: lo world, i be | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 0 | Initus: ld, i bellow go t | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 0 | Initus: ow go to hell | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 1 | Initus: you say hel | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 1 | Initus:  say hello wo | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 1 | Initus: lo world, i be | SUCCESS | you say hello world, i bellow go to hell
------------------------

In [8]:
sequence = 'she_sells_sea_shells_on_the_sea_shore'
reads = ['she_sells_s',
               'lls_sea_shel',
                    'ea_shells_o',
                       'shells_on_the_s',
                                  'he_sea_s',
                                      'ea_shore']
sequitur = Sequitur(reads,sequence)
successes = 0
n = 200
for seed in range(n):    
    for read in reads:
        s = 'Seed: ' + str(seed) + ' | Initus: ' + read + ' | '
        sequitur.sequitur(Sequence(read),seed=seed)
        if sequitur.sequence.seq == sequence: 
            s+='SUCCESS'
            successes+=1
        else: s+='FAILURE'
        print(s + ' | ' + sequitur.sequence.seq)
        print('-----------------------------------------')
print('ACCURACY: '+str((successes/(n*len(reads)))*100)+'%')

Seed: 0 | Initus: she_sells_s | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 0 | Initus: lls_sea_shel | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 0 | Initus: ea_shells_o | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 0 | Initus: shells_on_the_s | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 0 | Initus: he_sea_s | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 0 | Initus: ea_shore | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 1 | Initus: she_sells_s | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 1 | Initus: lls_sea_shel | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 1 | Initus: ea_she

In [9]:
sequence = 'betty_bought_butter_the_butter_was_bitter_betty_bought_better_butter_to_make_the_bitter_butter_better'
reads = ['betty_bought_butter_th',
                        'tter_the_butter_was_',
                              'he_butter_was_bitter_',
                                         'as_bitter_betty_bought',
                                                     'tty_bought_better_butter_t',
                                                                     'r_butter_to_make_the_',
                                                                                   'ke_the_bitter_butter_better']
sequitur = Sequitur(reads,sequence)
successes = 0
n = 200
for seed in range(n):    
    for read in reads:
        s = 'Seed: ' + str(seed) + ' | Initus: ' + read + ' | '
        sequitur.sequitur(Sequence(read),seed=seed)
        if sequitur.sequence.seq == sequence: 
            s+='SUCCESS'
            successes+=1
        else: s+='FAILURE'
        print(s + ' | ' + sequitur.sequence.seq)
        print('-----------------------------------------')
print('ACCURACY: '+str((successes/(n*len(reads)))*100)+'%')

Seed: 0 | Initus: betty_bought_butter_th | SUCCESS | betty_bought_butter_the_butter_was_bitter_betty_bought_better_butter_to_make_the_bitter_butter_better
-----------------------------------------
Seed: 0 | Initus: tter_the_butter_was_ | FAILURE | tty_bought_better_butter_to_make_the_bitter_butter_better_the_butter_was_bitter_betty_bought_butter_th
-----------------------------------------
Seed: 0 | Initus: he_butter_was_bitter_ | FAILURE | tty_bought_better_butter_to_make_the_bitter_butter_better_the_butter_was_bitter_betty_bought_butter_th
-----------------------------------------
Seed: 0 | Initus: as_bitter_betty_bought | FAILURE | tty_bought_better_butter_to_make_the_bitter_butter_better_the_butter_was_bitter_betty_bought_butter_th
-----------------------------------------
Seed: 0 | Initus: tty_bought_better_butter_t | SUCCESS | betty_bought_butter_the_butter_was_bitter_betty_bought_better_butter_to_make_the_bitter_butter_better
-----------------------------------------
Seed: 0 | I

In [14]:
successes = 0
n = 200
for seed in range(n):   
    sequence = generate_genome_sequence(200,seed=seed)
    reads = generate_reads(sequence,3,4,10,seed=seed)
    sequitur = Sequitur(reads,sequence)
    i = 0
    for read in reads:
        s = str(i) + ' | Seed: ' + str(seed) + ' | Initus: ' + read + ' | '
        sequitur.sequitur(Sequence(read),seed=seed)
        if sequitur.sequence.seq == sequence: 
            s+='SUCCESS'
            successes+=1
        else: s+='FAILURE'
        print(s + ' | ' + sequitur.sequence.seq)
        print('-----------------------------------------')
        i+=1
print('ACCURACY: '+str((successes/(n*len(reads)))*100)+'%')

0 | Seed: 0 | Initus: TTAGTTGTGCCGC | SUCCESS | TTAGTTGTGCCGCAGCGAAGTAGTGCTTGAAATATGCGACCCCTAAGTAGGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACCAAAACATAGAAACCATCAATAGACAGGTCATAATCGGTCCACCGGATCATTGGTGCATAGAGCCTGGGCGTTAACGCCCTTTATTACTAGCTTAATGGT
-----------------------------------------
1 | Seed: 0 | Initus: TAGTTGTGCC | SUCCESS | TTAGTTGTGCCGCAGCGAAGTAGTGCTTGAAATATGCGACCCCTAAGTAGGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACCAAAACATAGAAACCATCAATAGACAGGTCATAATCGGTCCACCGGATCATTGGTGCATAGAGCCTGGGCGTTAACGCCCTTTATTACTAGCTTAATGGT
-----------------------------------------
2 | Seed: 0 | Initus: AGTTGTGCCGCAG | SUCCESS | TTAGTTGTGCCGCAGCGAAGTAGTGCTTGAAATATGCGACCCCTAAGTAGGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACCAAAACATAGAAACCATCAATAGACAGGTCATAATCGGTCCACCGGATCATTGGTGCATAGAGCCTGGGCGTTAACGCCCTTTATTACTAGCTTAATGGT
-----------------------------------------
3 | Seed: 0 | Initus: GTTGTGCCGC | SUCCESS | TTAGTTGTGCCGCAGCGAAGTAGTGCTTGAAATATGCGACCCCTAAGTAGGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGA

: 

: 