### Tasks
- sprout_trie
    - builds from root
    - follows highest context path
    - sprout_trie again
- change_direction()
    - if there is no solid growth option in direction 0, switch directions to possibly eliminate options
- splice()
    - if there are no solid options in either direction, separately build a candidate to possibly eliminate options
- join()
    - join 2 candidates
    - maybe restart building process with candidates as new reads

In [2]:
''' 
The read is held by a leaf and sustains their uniqueness and ensures they're added to the sequence at most once.
'''
class Read:
    def __init__(self,read):
        self.read = read
        self.sequenced = False

    def partition(self,root,dir):
        if dir: return Node(self.read.partition(root)[0],dir), self.read.partition(root)[2]
        else: return Node(self.read.partition(root)[2],dir), self.read.partition(root)[0]

In [3]:
'''
A leaf is the end point of a branch and can only the gained information. 
Every Trie must have a leaf for every read containing the root
Leaves can become branches
'''
class Leaf:
    def __init__(self,context,information,read):
        self.context = context
        self.information = information
        self.read = read
    
    def branch(self,context,dir):
        stalk = ""
        i = 0
        while i < min(len(context.stalk),len(self.context.stalk)) and context[i] == self.context[i]: 
            stalk += context[i]
            i += 1
        return stalk

In [4]:
'''
The node is a convenience class for accessing the branch
'''
class Node:
    def __init__(self,stalk,dir):
        if len(stalk) > 0: self.stalk = stalk
        else: self.stalk = "^"
        self.reversed = False
        if dir: 
            self.stalk = ''.join(reversed(self.stalk))
            self.reversed = True

    def __eq__(self,other): return self.stalk[0] == other.stalk[0]

    def __hash__(self): return hash(self.stalk[0])

    def __getitem__(self,index): return self.stalk[index]

In [5]:
'''
A branch has either a collection of branches or a collection of leaves
Every branch must have at least 1 leaf
'''
class Branch:
    def __init__(self,root,network,node=None):
        self.branches = {}
        self.leaves = {}
        self.root = root
        self.network = network
        self.node = node

    def grow(self,sequence,dir,initial=False):
        if type(self) == Root: 
            if len(self.reads) > 0: 
                if len(self.leaves) == 0:
                    self.sprout(dir,initial)
                else: 
                    # traverse tree/find leave with best context
                    read = self.reads.pop()
                    context,information = read.partition(self.root,dir)
                    if self.leaves[context]: 
                        node = Node(self.leaves[context].branch(context,dir),dir)
                        # BUG: wrong node information
                        self.branches[node] = Branch(self.root,self.network,node)
                        if dir: self.leaves[context].context,_,_ = self.leaves[context].context.stalk.partition(node.stalk)
                        else: _,_,self.leaves[context].context = self.leaves[context].context.stalk.partition(node.stalk)
                        self.leaves[context].context = Node(self.leaves[context].context,dir)
                        self.branches[node].leaves[self.leaves[context].context] = Leaf(context,information,self.leaves[context].read)
                        self.leaves.pop(context)
                        if dir: context,_,_ = context.stalk.partition(node.stalk)
                        else: _,_,context = context.stalk.partition(node.stalk)
                        context = Node(context,dir)
                        self.branches[node].leaves[context] = Leaf(context,information,read)
                    else: self.leaves[context] = Leaf(context,information,read)
                self.grow(sequence,dir)
            # once there are no reads to add, we climb the structure
            else: self.climb(sequence,dir)
        else:
            pass # grow branch

    def climb(self,sequence,dir):
        if type(self) == Root:
            if dir: context,_,_ = sequence.partition(self.root)
            else: _,_,context = sequence.partition(self.root)
        else:
            if dir: context,_,_ = sequence.partition(self.node.stalk)
            else: _,_,context = sequence.partition(self.node.stalk)
        context = Node(context,dir)
        # 1. check for matching entries amongst the branches
        if context in self.branches: self.branches[context].climb(sequence,dir)
        else: 
            if not self.leaves[context].read.sequenced: 
                if dir: sequence += self.leaves[context].information
                else: sequence = self.leaves[context].information + sequence
                self.leaves[context].read.sequenced = True
                print(sequence)
                self.network.get_root(sequence,dir).grow(sequence,dir)
            else:
                carat = Node('^',dir) 
                if dir: sequence += self.leaves[carat].information
                else: sequence = self.leaves[carat].information + sequence
                self.leaves[carat].read.sequenced = True
                print(sequence)
                self.network.get_root(sequence,dir).grow(sequence,dir)
        # add information to sequence
        # start building with updated sequence
        # stopping condition?? 

In [6]:
'''
A root is a branch with a connection to the network and a list of the reads it comprises of.
It can have a collection of branches and leaves.
'''
class Root(Branch):
    def __init__(self,root,network):
        super().__init__(root,network)
        self.reads = []

    def add_read(self,read):
        self.reads += [read]

    def sprout(self,dir,initial=False):
        read = self.reads.pop()
        context,information = read.partition(self.root,dir)
        if initial: read.sequenced = True
        self.leaves[context] = Leaf(context,information,read)

In [7]:
class RootNetwork:
    def __init__(self,k):
        self.roots = {}
        self.k = k

    def __getitem__(self,key):
        return self.roots[key]

    def __contains__(self, key):
        return key in self.roots

    # dir = 1, context gain towards prefix
    # dir = 0, context gain towards suffix
    def build(self,sequence,dir=0,initial=True):
        self.get_root(sequence,dir).grow(sequence,dir,initial)

    def plant_trie(self,trie):
        self.roots[trie.root] = trie

    def get_root(self,sequence,dir):
        if dir: return self[sequence[-self.k:]]
        else: return self[sequence[:self.k]]

In [8]:
k = 3
r = RootNetwork(k)
reads = {}
for read in ['you say hel',
                ' say hello wo',
                        'lo world, i be',#'ld, i bellow go t',
                              'ld, i bellow go t',
                                        'ow go to hell']:
    reads[read] = Read(read)
    for i in range(len(read)-k+1):
        if read[i:i+k] not in r: r.plant_trie(Root(read[i:i+k],r))
        r[read[i:i+k]].add_read(reads[read])

In [9]:
r.build('ld, i bellow go t')

lo world, i bellow go t
 say hello world, i bellow go t
you say hello world, i bellow go t


KeyError: <__main__.Node object at 0x00000188C7ACA460>