In [56]:
import random
def generate_reads(seq, k, min_overlap, max_overlap,seed=None):
    random.seed(seed)
    reads = []
    for i in range(0, len(seq)-k+1):
        # Generate a random overlap within the specified range
        overlap = random.randint(min_overlap, max_overlap)
        start = i
        end = i + k + overlap
        reads.append(seq[start:end])
    return reads

def generate_genome_sequence(n,seed=None):
    random.seed(seed)
    nucleotides = {1:'A',2:'C',3:'G',4:'T'}
    seq = ''
    for i in range(n):
        seq += nucleotides[random.randint(1,4)]
    return seq

In [57]:
'''
DESCRIPTION
INPUT
OUTPUT
'''
class Stalk:
    def __init__(self,stalk):
        self.stalk = stalk
        if len(self.stalk) == 0: self.stalk = '$'

    def __repr__(self): return self.stalk

    def __eq__(self,other): return self[0] == other[0]

    def __hash__(self): return hash(self[0])

    def __getitem__(self,index): return self.stalk[index]

    def __len__(self):
        if self.stalk == '$' or self.stalk == '^': return 0
        return len(self.stalk)
    
    def __str__(self):
        if self.stalk =='$': return ''
        else: return self.stalk

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def common_substring(self,other):
        i = 0
        substr = ''
        if type(other) == str: other = Stalk(other)
        while i < min(len(self),len(other)) and self[i] == other[i]:
            substr += self[i]
            i += 1
        return Stalk(substr), Stalk(self[i:]), Stalk(other[i:])

In [58]:
'''
DESCRIPTION
INPUT
OUTPUT
'''
class Leaf:
    def __init__(self,left,right=''):
        if len(left) == 0:
            self.left = left
            self.right = 1
        else:
            self.left = left
            self.right = Leaf(right)
    
    def __repr__(self): return str(self.right)

    def __eq__(self,other): return self[0] == other[0]

    def __hash__(self): return hash(self.left)

    def __getitem__(self,index): return self.left[index]

    def __len__(self):
        if self.left == '$': return 0
        return len(self.left)
    
    def __is_shallow__(self): return True

In [59]:
class Sequence:
    def __init__(self,seq=None,seen=None):
        self.extensions = {}
        self.contains = set()
        if seq is None: 
            self.seq = ''
            self.seen = tuple()
        else: 
            self.seq = seq
            if seen is None: self.seen = (seq,)
            else: self.seen = seen

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def add(self,item): 
        if item not in self.seen: self.seen = tuple(list(self.seen) + [item]) 

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def __add__(self,other):
        self.seq += other.seq
        self.seen = tuple(list(self.seen)+list(other.seen))
        for a in set(self.extensions).intersection(self.seen):
            self.extensions.pop(a)
        for a in set(other.extensions).intersection(self.seen):
            other.extensions.pop(a)
        self.extensions.update(other.extensions)
        return self

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''    
    def rollback(self,sequence):
        self.seen = list(self.seen)
        while self.seen[-1] in sequence.seq: self.contains.add(self.seen.pop())
        self.seen = tuple(list(sequence.seen)+self.seen)
        self.seq = sequence.seq[:sequence.seq.find(self.seen[0])] + self.seq[:self.seq.find(self.seen[-1])+len(self.seen[-1])]
        self.contains.update(sequence.contains)
        sequence.seq = sequence.seq[:sequence.seq.find(self.seen[0])]

        # self.seen = list(self.seen)
        # self.seen.pop()
        # self.seq = self.seq[:self.seq.rfind(self.seen[-1])+ len(self.seen[-1])]
        # i = 0
        # while i <= min(len(sequence.seq),len(self.seq)) and self.seq[:i+1] in sequence.seq: i+=1
        # self.seq = sequence.seq[:-i] + self.seq 
        # self.seen = tuple(list(sequence.seen)+self.seen)
        # self.contains.union(sequence.contains)
        # self.extensions.clear()
        
        # i = 0
        # while i <= len(prefix.seen) and prefix.seen[-i-1] in suffix.seq: i += 1
        # suffix.seq = suffix.seq[:suffix.seq.find(prefix.seen[-i])]
        # suffix.seen = list(suffix.seen)
        # while len(suffix.seen) and suffix.seen[-1] not in suffix.seq: prefix.contains.add(suffix.seen.pop())
        # suffix.seen = tuple(suffix.seen)
        # i = 0
        # while i <= min(len(prefix.seq),len(suffix.seq)) and suffix.seq[:i+1] in prefix.seq: i += 1
        # prefix.seq = prefix.seq[:-i]
        # suffix = prefix + suffix
        # prefix = Sequence()

In [60]:
'''
DESCRIPTION
INPUT
OUTPUT
    '''
class Branch:
    def __init__(self):
        self.b = {}
        self.s = {}

    def __repr__(self): return repr(self.b)

    def __str__(self):
        s = '' 
        for i in range(len(list(self.b.values()))-1):s+=str(list(self.b.values())[i])+'\n'
        return s+str(list(self.b.values())[-1])
    
    def __getitem__(self,index):
        if type(index) == str: return self.b[Stalk(index)]
        return self.b[index]

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def __is_shallow__(self):
        for a in self.b.values():
            if type(a) == Branch: return False
        return True

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def __traverse__(self,context):
        b = self[context[0]]
        s = self.s[context[0]]
        context = context[len(s[0]):]
        while len(context) > 0 and len(b) > 1:
            s = b.s[context[0]]
            b = b[context[0]]
            context = context[len(s[0]):]
        return b
    
    def __setitem__(self,index,value):
        if type(index) == str: self.b[Stalk(index)] = value
        else: self.b[index] = value

    def __contains__(self,other): 
        if type(other) == str: return Stalk(other) in self.b
        return other in self.b

    def __len__(self): return len(self.b)

    def pop(self,index): return self.b.pop(index)

    '''
    DESCRIPTION
        returns a copy of the trie with the index removed from the branches and from the stalks
    INPUT
        index | a Stalk() that exists in the trie
    OUTPUT
        br    | a copy of the Branch() with the index removed from the branches and from the stalks
    '''
    def pop_copy(self,index):
        b = self.b.copy()
        b.pop(index)
        s = self.s.copy()
        s.pop(index)
        br = Branch()
        br.b = b
        br.s = s
        return br

    '''
    DESCRIPTION
        adds a suffix to the trie
    INPUT
        stalk | a Stalk() which is a common substring of every read up to this point and beyond
        reads | a set of reads which have with the same common substring up to this point
    '''
    def add(self,stalk,reads):
        if stalk in self:
            if not len(stalk):
                self[stalk].right+=1
                self.s[stalk][1].update(reads)
                return
            if type(self[stalk]) == Leaf:
                branch = Branch()
                l1 = self.pop(stalk)
                stalk_ = list(self.s.pop(stalk))
                stalk_[0],l1.left,l2 = stalk_[0].common_substring(stalk)
                stalk_[1].update(reads)
                branch.add(l1.left,stalk_[1].copy())
                branch.add(l2,reads)
                stalk_ = tuple(stalk_)
                self[stalk_[0]] = branch
                self.s[stalk_[0]] = stalk_
            else:
                stalk_ = list(self.s.pop(stalk))
                branch = self.pop(stalk)
                stalk_[0],bstalk,stalk = stalk_[0].common_substring(stalk)
                if len(bstalk):
                    br = Branch()
                    br[bstalk] = branch 
                    br.s[bstalk] = (bstalk,stalk_[1].copy())
                    br.add(stalk,reads)
                    self[stalk_[0]] = br
                else: 
                    branch.add(stalk,reads)
                stalk_[1].update(reads)
                stalk_ = tuple(stalk_)
                if not len(bstalk): self[stalk_[0]] = branch
                self.s[stalk_[0]] = stalk_
        else:
            if type(stalk) == str: stalk = Stalk(stalk)
            self.s[stalk] = (stalk,reads)
            self[stalk] = Leaf(stalk)

    '''
    DESCRIPTION
        a method which returns all reads beyond a certain point on a branch
    INPUT
        exclude    | a list of all reads which should not be added as an extension
        context    | the path from the root up to that point in the branch
        t          | a string which holds the path to the read from the context
    OUTPUT
        extensions | a dictionary of all possible extensions indexed on the read and pointing
                   | to a 3-tuple holding the context, a Sequence() of the read along the path
                   | a Sequence() of the read after the context 
    '''
    def unpack(self,exclude,context,t=None):
        if t is None: t = ''
        extensions = {}
        for s in self.s:
            if type(self[s]) is Branch:
                for b in self[s].b:#.b.values():
                    if type(self[s].b[b]) is Branch: 
                        extensions.update(self[s].b[b].unpack(exclude,context,t=t+s.stalk+b.stalk))
                        continue
                    for read in self[s].s[self[s].b[b].left][1]:
                        if read in exclude: continue
                        extensions[read] = context,Sequence(read),\
                            Sequence(str(t) + s.stalk + (self[s].b[b].left.stalk if self[s].b[b].left.stalk != '$' else ''),(read,))
            else:
                if len(self.s[s][1].intersection(exclude)): continue
                extensions[list(self.s[s][1])[0]] = context,\
                    Sequence(list(self.s[s][1])[0]),\
                    Sequence(t + (s.stalk if s.stalk != '$' else ''),(list(self.s[s][1])[0],))
        return extensions

In [100]:
'''
DESCRIPTION
    an object which constructs a suffix trie out of fragments of a sequence and can traverse 
    the trie to resconstruct some target sequence
INPUT
    reads | a list of strings which overlap and are fragments of a longer sequence
'''
class Sequitur:
    def __init__(self,reads,correct_sequence=None):
        if correct_sequence is not None: self.correct_sequence = correct_sequence
        self.branch = Branch()
        self.reads = reads
        for read in reads: 
            for i in range(len(read)): 
                self.branch.add(Stalk(read[i:]),{read})
        self.sequence = Sequence()

    '''
    DESCRIPTION
    INPUT
    OUTPUT
    '''
    def context_depth_sort(self,e): return e[1].seq.find(e[0])

    '''
    DESCRIPTION
        method that attempts to extend a sequence or join a prefix to a suffix
    INPUT
        sequence | a Sequence() with the extension read
        prefix   | a Sequence() that has been previously extended with the prefix read
        suffix   | a Sequence() that has reached the terminus
    OUTPUT
        whether or not sequence was successfully extended
    '''
    def extend(self,sequence,prefix,suffix):
        if len(prefix.seq) == 0: prefix = sequence                                                                          # set an empty prefix to be the first extension read
        extensions = list(sequence.extensions.values())
        extensions.sort(key=self.context_depth_sort,reverse=True)
        contains = set()
        for extension in extensions:                                                                                        # try every possible extension
            if extension[1].seq in suffix.seq:
                if prefix.seq.endswith(suffix.seq[:suffix.seq.find(extension[0])+len(extension[0])]):
                    suffix.seq = suffix.seq[suffix.seq.find(extension[0])+len(extension[0]):] 
                    prefix = prefix + suffix
                    suffix.seq = prefix.seq
                    suffix.seen = prefix.seen
                    suffix.contains = prefix.contains
                    return False
                if prefix.seq in suffix.seq:
                    suffix.contains.add(prefix.seq)
                    suffix.contains.add(extension[1].seq)
                    contains.add(extension[1].seq)
                    continue
            if extension[1].seq in prefix.seq:
                prefix.contains.add(extension[1].seq)
                contains.add(extension[1].seq)
                continue
            if sequence.seq.endswith(extension[1].seq[:extension[1].seq.rfind(extension[0])+len(extension[0])]):                # if an extension is a suffix to the extension read
                self.sequitur(extension[1],prefix+extension[2],suffix)                                                        # or else extend the prefix and set the extension as the extension read
                return True                                                                                                         # and return that a successful extension was achieved
            if sequence.seq in extension[1].seq:
                extension[1].contains.add(sequence.seq)
                self.sequitur(extension[1],extension[1],suffix)
                return True
        if len(contains): return True
        return False                                                                                                        # return no successful extension was achieved
    
    '''
    DESCRIPTION
        a method which traverses a suffix trie and attempts to reconstruct the sequence from which the 
        trie was constructed
    INPUT
        sequence | a Sequence() with a read chosen as an initus
        prefix   | an optional Sequence() which has been extended but has not reached the terminus. 
                 | defaults to and empty Sequence()
        suffix   | an optional Sequence() which has reached the terminus. defaults to an empty 
                 | Sequence().
        seed     | an optional integer to seed the randomiser. defaults to None.
    '''
    def sequitur(self,sequence,prefix=Sequence(),suffix=Sequence(),seed=None):
        import random
        if seed is not None: random.seed(seed)
        self.sequence = sequence
        i = 1
        context = self.sequence.seq[-i:]
        options = set()
        while len(set(self.reads).difference(self.sequence.contains.union(self.sequence.seen))):
            branch = self.branch.__traverse__(context)
            if branch.__is_shallow__() or i == len(self.sequence.seq):                                                          # traverse the branch to the context until the branch is "shallow"
                if type(branch) is Branch:                                                                                      # if the branch is more than a single leaf
                    stalks = branch.s.copy()
                    [options.update(s[1]) for s in stalks.values()]
                if len(options.difference(self.sequence.contains.union(self.sequence.seen)\
                                          .union(prefix.contains.union(prefix.seen)))):                                         # if there are reads on the branch that have not been added
                    for s in stalks.values():
                        for x in s[1].difference(self.sequence.contains.union(self.sequence.seen)\
                                                 .union(prefix.contains.union(prefix.seen))):
                            self.sequence.extensions.update({x:(context,Sequence(x),Sequence(s[0].stalk,(x,)))})                    # add those options as possible extensions
                else:
                    options = set(self.reads)\
                                        .difference(prefix.contains.union(prefix.seen)\
                                        .union(suffix.contains.union(suffix.seen))\
                                        .union(self.sequence.contains.union(self.sequence.seen)))
                    for o in options:
                        if o in prefix.seq: prefix.contains.add(o)
                        if o in suffix.seq: suffix.contains.add(o)
                        if o in self.sequence.seq: self.sequence.contains.add(o)
                    i -= 1
                    context = self.sequence.seq[-i:]                 # select any available read as the extension read
                    branch = self.branch.__traverse__(context)                                                     # backtrack to the next shallowest branch
                    [options.update(s[1]) for s in branch.s.values()]
                    self.sequence.extensions.update(branch.unpack(self.sequence.contains.union(self.sequence.seen)\
                                                                  .union(prefix.contains.union(prefix.seen)),context))              # add reads found there as possible extensions
                    if len(self.sequence.extensions): pass
                    elif len(set(self.reads)\
                                        .difference(prefix.contains.union(prefix.seen)\
                                        .union(self.sequence.contains.union(self.sequence.seen)))):
                        if len(prefix.seen) > 1 and prefix.seen[0] in options and prefix.seen[-1].endswith(prefix.seen[0][:prefix.seen[0].find(context)+len(context)]):
                            prefix.seen = tuple([prefix.seen[-1]]+list(prefix.seen[:-1]))
                            prefix.seq =  prefix.seen[0][:-len(prefix.seen[1][:prefix.seen[1].find(context)+len(context)])] + prefix.seq[:prefix.seq.find(prefix.seen[-1])+len(prefix.seen[-1])]
                            self.sequence = Sequence(prefix.seen[-1])
                        else:
                            suffix = prefix
                            self.sequence = Sequence(random.choice(list(set(self.reads)\
                                            .difference(prefix.contains.union(prefix.seen)\
                                            .union(self.sequence.contains.union(self.sequence.seen))))))
                            prefix = Sequence()
                        i = 1
                        context = self.sequence.seq[-i:]
                        options = set()                                                                                                 # restart the loop
                        continue
                    else:
                        if not len(set(self.reads).difference(prefix.contains.union(prefix.seen))): self.sequence = prefix
                        elif not len(set(self.reads).difference(suffix.contains.union(suffix.seen))): self.sequence = suffix
                        break                                                                                               # restart the loop
                options = set()                                                                                                 # restart the loop
                if not self.extend(self.sequence,prefix,suffix): 
                    if len(suffix.seq):
                        i = 1
                        while i < min(len(prefix.seq),len(suffix.seq),max(len(prefix.seen[-1]),len(suffix.seen[0]))) and suffix.seq[-i-1:] in prefix.seq: i+=1
                        if prefix.seq.startswith(suffix.seq[-i:]): 
                            prefix.seq = prefix.seq[i:]
                            suffix = suffix + prefix
                            prefix = Sequence()
                    if len(prefix.seen) > 1 and self.sequence.seq in prefix.seen[0]:                                                # if the attempt is unsuccessful and the extension read is a prefix to the prefix
                        prefix.rollback(self.sequence)                                                                                  # rollback the prefix
                        self.sequence = Sequence(prefix.seen[-1])
                    else:     
                        options = list(set(self.reads)\
                                    .difference(suffix.contains.union(suffix.seen)\
                                    .union(prefix.contains.union(prefix.seen))\
                                    .union(self.sequence.contains.union(self.sequence.seen))))
                        for o in options:
                            if o in prefix.seq: prefix.contains.add(o)
                            if o in suffix.seq: suffix.contains.add(o)
                            if o in self.sequence.seq: self.sequence.contains.add(o)                                                                                                      # or else the prefix is actually a suffix
                        if self.sequence.seq in prefix.seq: prefix.contains.add(self.sequence.seq)
                        if self.sequence.seq in suffix.seq: suffix.contains.add(self.sequence.seq)
                        if len(prefix.seq) and len(self.sequence.contains.union(self.sequence.seen)) > 0:
                            if len(prefix.seen): suffix = prefix
                            else: suffix = self.sequence
                        options = list(set(self.reads)\
                                        .difference(suffix.contains.union(suffix.seen)\
                                        .union(prefix.contains.union(prefix.seen))\
                                        .union(self.sequence.contains.union(self.sequence.seen))))
                        if not len(options):
                            self.sequence = suffix
                            break
                        self.sequence = Sequence(random.choice(options))                   # select any available read as the extension read
                        prefix = Sequence()                                                                                             # clear the prefix
                        options = set()
                    i = 1
                    context = self.sequence.seq[-i:]
                    continue                                                                                                        # restart the loop
                if not len(set(self.reads).difference(prefix.contains.union(prefix.seen))):
                    self.sequence = prefix
                    break                                                                              # if there are no available extensions
                i = 0
            i += 1                                                                                                          
            context = self.sequence.seq[-i:]     # increase the context for the depth of the trie traversal

In [101]:
seed = 5
sequence = generate_genome_sequence(200,seed=seed)
reads = generate_reads(sequence,3,4,10,seed=seed)
sequitur = Sequitur(reads,sequence)

In [103]:
# i = 0
# for read in reads:
sequitur.sequitur(Sequence(reads[47]),seed=seed)
print(47,sequitur.sequence.seq ==  sequence)
    # i+=1

KeyboardInterrupt: 

In [None]:
sequence = 'you say hello world, i bellow go to hell'
reads = ['you say hel',
            ' say hello wo',
                    'lo world, i be',
                          'ld, i bellow go t',
                                    'ow go to hell']
sequitur = Sequitur(reads)
successes = 0
n = 200
for seed in range(n):    
    for read in reads:
        s = 'Seed: ' + str(seed) + ' | Initus: ' + read + ' | '
        sequitur.sequitur(Sequence(read),seed=seed)
        if sequitur.sequence.seq == sequence: 
            s+='SUCCESS'
            successes+=1
        else: s+='FAILURE'
        print(s + ' | ' + sequitur.sequence.seq)
        print('-----------------------------------------')
print('ACCURACY: '+str((successes/(200*len(reads)))*100)+'%')

Seed: 0 | Initus: you say hel | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 0 | Initus:  say hello wo | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 0 | Initus: lo world, i be | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 0 | Initus: ld, i bellow go t | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 0 | Initus: ow go to hell | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 1 | Initus: you say hel | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 1 | Initus:  say hello wo | SUCCESS | you say hello world, i bellow go to hell
-----------------------------------------
Seed: 1 | Initus: lo world, i be | SUCCESS | you say hello world, i bellow go to hell
------------------------

In [None]:
sequence = 'she_sells_sea_shells_on_the_sea_shore'
reads = ['she_sells_s',
               'lls_sea_shel',
                    'ea_shells_o',
                       'shells_on_the_s',
                                  'he_sea_s',
                                      'ea_shore']
sequitur = Sequitur(reads)
successes = 0
n = 200
for seed in range(n):    
    for read in reads:
        s = 'Seed: ' + str(seed) + ' | Initus: ' + read + ' | '
        sequitur.sequitur(Sequence(read),seed=seed)
        if sequitur.sequence.seq == sequence: 
            s+='SUCCESS'
            successes+=1
        else: s+='FAILURE'
        print(s + ' | ' + sequitur.sequence.seq)
        print('-----------------------------------------')
print('ACCURACY: '+str((successes/(n*len(reads)))*100)+'%')

Seed: 0 | Initus: she_sells_s | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 0 | Initus: lls_sea_shel | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 0 | Initus: ea_shells_o | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 0 | Initus: shells_on_the_s | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 0 | Initus: he_sea_s | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 0 | Initus: ea_shore | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 1 | Initus: she_sells_s | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 1 | Initus: lls_sea_shel | SUCCESS | she_sells_sea_shells_on_the_sea_shore
-----------------------------------------
Seed: 1 | Initus: ea_she

In [None]:
sequence = 'betty_bought_butter_the_butter_was_bitter_betty_bought_better_butter_to_make_the_bitter_butter_better'
reads = ['betty_bought_butter_th',
                        'tter_the_butter_was_',
                              'he_butter_was_bitter_',
                                         'as_bitter_betty_bought',
                                                     'tty_bought_better_butter_t',
                                                                     'r_butter_to_make_the_',
                                                                                   'ke_the_bitter_butter_better']
sequitur = Sequitur(reads)
successes = 0
n = 200
for seed in range(n):    
    for read in reads:
        s = 'Seed: ' + str(seed) + ' | Initus: ' + read + ' | '
        sequitur.sequitur(Sequence(read),seed=seed)
        if sequitur.sequence.seq == sequence: 
            s+='SUCCESS'
            successes+=1
        else: s+='FAILURE'
        print(s + ' | ' + sequitur.sequence.seq)
        print('-----------------------------------------')
print('ACCURACY: '+str((successes/(n*len(reads)))*100)+'%')

Seed: 0 | Initus: betty_bought_butter_th | SUCCESS | betty_bought_butter_the_butter_was_bitter_betty_bought_better_butter_to_make_the_bitter_butter_better
-----------------------------------------
Seed: 0 | Initus: tter_the_butter_was_ | SUCCESS | betty_bought_butter_the_butter_was_bitter_betty_bought_better_butter_to_make_the_bitter_butter_better
-----------------------------------------
Seed: 0 | Initus: he_butter_was_bitter_ | SUCCESS | betty_bought_butter_the_butter_was_bitter_betty_bought_better_butter_to_make_the_bitter_butter_better
-----------------------------------------
Seed: 0 | Initus: as_bitter_betty_bought | SUCCESS | betty_bought_butter_the_butter_was_bitter_betty_bought_better_butter_to_make_the_bitter_butter_better
-----------------------------------------
Seed: 0 | Initus: tty_bought_better_butter_t | SUCCESS | betty_bought_butter_the_butter_was_bitter_betty_bought_better_butter_to_make_the_bitter_butter_better
-----------------------------------------
Seed: 0 | Init

In [None]:
successes = 0
n = 200
for seed in range(n):   
    sequence = generate_genome_sequence(200,seed=seed)
    reads = generate_reads(sequence,3,4,10,seed=seed)
    sequitur = Sequitur(reads,sequence)
    for read in reads:
        s = 'Seed: ' + str(seed) + ' | Initus: ' + read + ' | '
        sequitur.sequitur(Sequence(read),seed=seed)
        if sequitur.sequence.seq == sequence: 
            s+='SUCCESS'
            successes+=1
        else: s+='FAILURE'
        print(s + ' | ' + sequitur.sequence.seq)
        print('-----------------------------------------')
print('ACCURACY: '+str((successes/(n*len(reads)))*100)+'%')

Seed: 0 | Initus: TTAGTTGTGCCGC | SUCCESS | TTAGTTGTGCCGCAGCGAAGTAGTGCTTGAAATATGCGACCCCTAAGTAGGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACCAAAACATAGAAACCATCAATAGACAGGTCATAATCGGTCCACCGGATCATTGGTGCATAGAGCCTGGGCGTTAACGCCCTTTATTACTAGCTTAATGGT
-----------------------------------------
Seed: 0 | Initus: TAGTTGTGCC | SUCCESS | TTAGTTGTGCCGCAGCGAAGTAGTGCTTGAAATATGCGACCCCTAAGTAGGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACCAAAACATAGAAACCATCAATAGACAGGTCATAATCGGTCCACCGGATCATTGGTGCATAGAGCCTGGGCGTTAACGCCCTTTATTACTAGCTTAATGGT
-----------------------------------------
Seed: 0 | Initus: AGTTGTGCCGCAG | SUCCESS | TTAGTTGTGCCGCAGCGAAGTAGTGCTTGAAATATGCGACCCCTAAGTAGGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACCAAAACATAGAAACCATCAATAGACAGGTCATAATCGGTCCACCGGATCATTGGTGCATAGAGCCTGGGCGTTAACGCCCTTTATTACTAGCTTAATGGT
-----------------------------------------
Seed: 0 | Initus: GTTGTGCCGC | SUCCESS | TTAGTTGTGCCGCAGCGAAGTAGTGCTTGAAATATGCGACCCCTAAGTAGGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACC

IndexError: tuple index out of range

In [None]:
sequitur.sequence.seq
