### Tasks
- sprout_trie
    - builds from root
    - follows highest context path
    - sprout_trie again
- change_direction()
    - if there is no solid growth option in direction 0, switch directions to possibly eliminate options
- splice()
    - if there are no solid options in either direction, separately build a candidate to possibly eliminate options
- join()
    - join 2 candidates
    - maybe restart building process with candidates as new reads

In [8]:
''' 
The read is held by a leaf and sustains their uniqueness and ensures they're added to the sequence at most once.
'''
class Read:
    def __init__(self,read):
        self.read = read
        self.is_sequenced = False

    def partition(self,root,dir):
        if dir: return Node(self.read.partition(root)[0],dir), self.read.partition(root)[2]
        else: return Node(self.read.partition(root)[2],dir), self.read.partition(root)[0]

In [9]:
'''
A leaf is the end point of a branch and can only the gained information. 
Every Trie must have a leaf for every read containing the root
Leaves can become branches
'''
class Leaf:
    def __init__(self,context,information,read):
        self.context = context
        if len(information) == 0: self.information = '$'
        else: self.information = information
        self.read = read

    def __repr__(self): return str(self.information)
    
    def branch(self,context,dir):
        stalk = ""
        i = 0
        while i < min(len(context.stalk),len(self.context.stalk)) and context[i] == self.context[i]: 
            stalk += context[i]
            i += 1
        return stalk

In [10]:
'''
The node is a convenience class for accessing the branch
'''
class Node:
    def __init__(self,stalk,dir):
        if len(stalk) > 0: self.stalk = stalk
        else: self.stalk = "^"
        self.reversed = False
        if dir: 
            self.stalk = ''.join(reversed(self.stalk))
            self.reversed = True

    def __eq__(self,other): return self.stalk[0] == other.stalk[0]

    def __hash__(self): return hash(self.stalk[0])

    def __getitem__(self,index): return self.stalk[index]

    def __repr__(self): return self.stalk

    def __len__(self):
        if self.stalk == '^': return 0
        else: return len(self.stalk)

In [11]:
'''
A branch has either a collection of branches or a collection of leaves
Every branch must have at least 1 leaf
'''
class Branch:
    def __init__(self,root,network,node=None):
        self.branches = {}
        self.leaves = {}
        self.root = root
        self.network = network
        self.node = node

    def grow(self,leaf,dir):
        # TODO: grow leaf on branch
        # if the leaf has a common stalk with the main branch on the context-gaining end, split the stalk along the context
            # the node should be updated to only the common stalk
            # a new branch will be created
                # all existing leaves would be added to the new branch
            # the new leaf will be added as a leaf
        # if the branch has a branch with a stalk common to the leaf, grow the leaf on that branch
        # context,information = leaf.read.partition()
        # if self.branches[leaf.context]: 
        # else add the leaf to current branch
        node = Node(leaf.context.stalk[len(self.node.stalk):],dir)
        self.node.stalk = leaf.context.stalk
        self.branches[node] = Branch(self.root,self.network,node)
        for l in list(self.leaves.values()): self.branches[node].leaves[l.context] = self.leaves.pop(l.context)
        self.leaves[leaf.context] = leaf
        # branch = 
        # if len(self.branches) > 0 and self.branches[leaf.context]: self.branches[leaf.context].grow(Leaf(context,information,read))

    def climb(self,sequence,dir):
        if type(self) == Root:
            if dir: context,_,_ = sequence.partition(self.root)
            else: _,_,context = sequence.partition(self.root)
        else:
            if dir: context,_,_ = sequence.partition(self.node.stalk)
            else: _,_,context = sequence.partition(self.node.stalk)
        context = Node(context,dir)
        if context in self.branches: return self.branches[context].climb(sequence,dir)
        else: 
            if context in self.leaves and not self.leaves[context].read.is_sequenced: 
                if dir: sequence += self.leaves[context].information
                else: sequence = self.leaves[context].information + sequence
                self.leaves[context].read.is_sequenced = True
                return sequence
            else:
                carat = Node('^',dir) 
                if carat in self.leaves:
                    if dir: sequence += self.leaves[carat].information
                    else: sequence = self.leaves[carat].information + sequence
                    self.leaves[carat].read.is_sequenced = True
                    return sequence
                else: 
                    if dir: return sequence + '$'
                    else: return '^' + sequence

    def is_dead(self):
        for leaf in self.leaves.values(): 
            if leaf.read.is_sequenced: continue
            else: return False
        return True

In [12]:
'''
A root is a branch with a connection to the network and a list of the reads it comprises of.
It can have a collection of branches and leaves.
'''
class Root(Branch):
    def __init__(self,root,network):
        super().__init__(root,network)
        self.reads = []

    def __repr__(self):
        return self.root
        
    def add_read(self,read):
        self.reads += [read]

    # BUG: when two reads have the same initial kmer, the second entry replaces the first (apparent dependence on starting order)
    # TODO: resolving the path when two entries share an initial kmer: plan()
        # the plan routine will build out each kmer with matching initial kmers with the same direction
            # if a route joins with an already-sequenced read, it's eliminated
            # if a route finds the end, that sequence is added to the whole sequence
    def grow(self,sequence,dir,initial=False):
        if len(self.reads) > 0: 
            if len(self.leaves) == 0: self.sprout(dir,initial)
            else: 
                read = self.reads.pop()
                while read.is_sequenced and len(self.reads) > 0: read = self.reads.pop()
                if not read.is_sequenced:
                    context,information = read.partition(self.root,dir)
                    if context in self.leaves: 
                        node = Node(self.leaves[context].branch(context,dir),dir)
                        self.branches[node] = Branch(self.root,self.network,node)
                        if dir: self.leaves[context].context,_,self.leaves[context].information = self.leaves[context].context.stalk.partition(node.stalk)
                        else: self.leaves[context].information,_,self.leaves[context].context = self.leaves[context].context.stalk.partition(node.stalk)
                        self.leaves[context].context = Node(self.leaves[context].context,dir)
                        self.branches[node].leaves[self.leaves[context].context] = Leaf(self.leaves[context].context,self.leaves[context].information,self.leaves[context].read)
                        self.leaves.pop(context)
                        if dir: context,_,_ = context.stalk.partition(node.stalk)
                        else: _,_,context = context.stalk.partition(node.stalk)
                        context = Node(context,dir)
                        self.branches[node].leaves[context] = Leaf(context,information,read)
                    else: self.leaves[context] = Leaf(context,information,read)
            return self.grow(sequence,dir)
        else: 
            print(sequence)
            # if sequence[0] == '^':
            #     if dir: return sequence
            #     return self.network.get_root(sequence,1).grow(sequence,1)
            sequence = self.climb(sequence,dir) 
            if sequence[0] == '^' and not dir: return self.network.get_root(sequence,1).grow(sequence,1)
            elif sequence[-1] == '$': return sequence
            return self.network.get_root(sequence,dir).grow(sequence,dir)

    # def change_direction(self,sequence,dir,)

    def sprout(self,dir,initial=False):
        read = self.reads.pop()
        while read.is_sequenced and len(self.reads) > 0: read = self.reads.pop()
        if not read.is_sequenced:
            context,information = read.partition(self.root,dir)
            if initial: read.is_sequenced = True
            if len(self.branches) > 0 and context in self.branches: self.branches[context].grow(Leaf(context,information,read),dir)
            else: self.leaves[context] = Leaf(context,information,read)

In [13]:
class RootNetwork:
    def __init__(self,k):
        self.roots = {}
        self.k = k

    def __getitem__(self,key):
        return self.roots[key]

    def __contains__(self, key):
        return key in self.roots

    # dir = 1, context gain towards prefix
    # dir = 0, context gain towards suffix
    def build(self,sequence,dir=0,initial=True):
        return self.get_root(sequence,dir).grow(sequence,dir,initial)

    def plant_trie(self,trie):
        self.roots[trie.root] = trie

    def get_root(self,sequence,dir):
        if dir: return self[sequence[-self.k:]]
        else: return self[sequence[:self.k]]

In [14]:
k = 3
r = RootNetwork(k)
reads = {}
for read in ['you say hel',
                ' say hello wo',
                        'lo world, i be',
                              'ld, i bellow go t',
                                        'ow go to hell']:
    reads[read] = Read(read)
    for i in range(len(read)-k+1):
        if read[i:i+k] not in r: r.plant_trie(Root(read[i:i+k],r))
        r[read[i:i+k]].add_read(reads[read])
r.build('ld, i bellow go t')

ld, i bellow go t
lo world, i bellow go t
 say hello world, i bellow go t
you say hello world, i bellow go t
^you say hello world, i bellow go t
^you say hello world, i bellow go to hell


'^you say hello world, i bellow go to hell$'

In [15]:
k = 3
r = RootNetwork(k)
reads = {}
for read in ['you say hel',
                ' say hello wo',
                        'lo world, i be',#'ld, i bellow go t',
                              'ld, i bellow go t',
                                        'ow go to hell']:
    reads[read] = Read(read)
    for i in range(len(read)-k+1):
        if read[i:i+k] not in r: r.plant_trie(Root(read[i:i+k],r))
        r[read[i:i+k]].add_read(reads[read])
r.build(' say hello wo')

 say hello wo
you say hello wo
^you say hello wo
^you say hello world, i be
^you say hello world, i bellow go t
^you say hello world, i bellow go to hell


'^you say hello world, i bellow go to hell$'

In [16]:
k = 3
r = RootNetwork(k)
reads = {}
for read in ['she_sells_s',
                   'lls_sea_shel',
                        'ea_shells_o',
                           'shells_on_the_s',
                                      'he_sea_s',
                                          'ea_shore']:
    reads[read] = Read(read)
    for i in range(len(read)-k+1):
        if read[i:i+k] not in r: r.plant_trie(Root(read[i:i+k],r))
        r[read[i:i+k]].add_read(reads[read])
r.build('shells_on_the_s')

shells_on_the_s
lls_sea_shells_on_the_s
she_sells_sea_shells_on_the_s
^she_sells_sea_shells_on_the_s
^she_sells_sea_shells_on_the_sea_s
^she_sells_sea_shells_on_the_sea_shells_o


'^she_sells_sea_shells_on_the_sea_shells_o$'