In [160]:
import numpy as np
import math
import itertools
from functools import cache

In [161]:
class SequenceTreeNode:
    def __init__(self):
        self.children = np.array([None, None])
        self.char = -1
        self.number_of_leafs = 0
        self.contains_0_vector = False
        self.contains_1_vector = False

    def has_sequence(self, x):
        node = self
        for i,c in enumerate(x):
            if i == len(x)-1:
                return True
            if node.children[c] is None:
                return False
            node = node.children[c]

    def insert_sequence(self, x):
        if self.has_sequence(x):
            return

        self.__insert_sequence_internal(x)

    def __insert_sequence_internal(self, x):
        self.number_of_leafs += 1
        if np.all(x==0):
            self.contains_0_vector = True   
        elif np.all(x==1):
            self.contains_1_vector = True   

        if x.shape[0] == 0:
            return

        letter = x[0]
        if self.children[letter] is None:
            self.children[letter] = SequenceTreeNode()

        self.children[letter].char = letter
        self.children[letter].__insert_sequence_internal(x[1:])

    def print_tree(self):
        self.__print_tree(0)

    def __print_tree(self, depth):
        print(f'({depth}) - char {self.char} has {self.number_of_leafs} sequences.\n\t\
            contains the 0 vector: {self.contains_0_vector}\n\t\
            contains the 1 vector: {self.contains_1_vector}')
        for child in self.children:
            if child is not None:
               child.__print_tree(depth+1)

    def print_all_sequences(self):
        self.__print_all_sequences(np.array([], dtype=np.ubyte))

    
    def __print_all_sequences(self, path):
        if np.all(self.children == None):
            print(path)
        for child in self.children:
            if child is None:
                continue
            child.__print_all_sequences(np.append(path, child.char))


In [162]:
@cache
def get_maximal_deletion_ball_size(n, t):
    if n < t or t < 0:
        return 0

    size = 0
    for i in range(t+1):
        size += math.comb(n-t, i) * get_maximal_deletion_ball_size(t, t-i)
    return size

@cache
def get_maximal_number_of_common_subsequences(n, t):
    if n <= t or t <= 0:
        return 0
    return get_maximal_deletion_ball_size(n,t) - get_maximal_deletion_ball_size(n-1,t) + get_maximal_deletion_ball_size(n-2,t-1)

In [163]:
def get_ordering(U):
    k = np.zeros_like(U.children)
    for i, child in enumerate(U.children):
        k[i] = child.number_of_leafs
    t = np.argsort(-k, kind='stable') # -k so it will be in descending order
    c = k[t]
    return t, c

In [164]:
def get_deletion_ball_tree(x, t, N=-1):
    result = SequenceTreeNode()
    for c in itertools.combinations(x, x.size-t):
        result.insert_sequence(np.array(c))
        if result.number_of_leafs == N:
            break

    return result

In [165]:
n = 4
t = 1
q = 2
x=np.random.randint(0,q,(n),dtype=np.ubyte)
D_x = get_deletion_ball_tree(x, t)
print(f'x = {x}')
print()
D_x.print_all_sequences()
D_x.print_tree()

x = [0 1 1 1]

[0 1 1]
[1 1 1]
(0) - char -1 has 2 sequences.
	            contains the 0 vector: False
	            contains the 1 vector: True
(1) - char 0 has 1 sequences.
	            contains the 0 vector: False
	            contains the 1 vector: True
(2) - char 1 has 1 sequences.
	            contains the 0 vector: False
	            contains the 1 vector: True
(3) - char 1 has 1 sequences.
	            contains the 0 vector: True
	            contains the 1 vector: False
(1) - char 1 has 1 sequences.
	            contains the 0 vector: False
	            contains the 1 vector: True
(2) - char 1 has 1 sequences.
	            contains the 0 vector: False
	            contains the 1 vector: True
(3) - char 1 has 1 sequences.
	            contains the 0 vector: True
	            contains the 1 vector: False
