In [36]:
import numpy as np
import math
import itertools
from functools import cache

In [37]:
n = 10
q = 2
x=np.random.randint(0,q,(n),dtype=np.ubyte)

In [38]:
def get_deletion_ball(x, t):
    return list(set(itertools.combinations(x, x.size-t)))


In [39]:
r=1
D_x_1 = get_deletion_ball(x,r)
print(f'For {x} size {n} vector of alphabet {q} we get a radius {r} deletion ball of size {len(D_x_1)}')

For [1 1 0 1 0 1 1 1 0 0] size 10 vector of alphabet 2 we get a radius 1 deletion ball of size 6


In [40]:
def get_composition_vector(x):
    k = np.zeros(q,dtype=int)
    for i in x:
        k[i] += 1
    return k

def get_ordering_permutation(x):
    k = get_composition_vector(x)
    return np.argsort(-k, kind='stable') # -k so it will be in descending order

def get_order_composition(x):
    k = get_composition_vector(x)
    return k[np.argsort(-k, kind='stable')] # -k so it will be in descending order

def number_of_runs(x):
    runs = 1
    prev_x = x[0]
    for i in x[1:]:
        if i != prev_x:
            runs += 1
        prev_x = i

    return runs

@cache
def get_maximal_deletion_ball_size(n, t, q=2):
    if n < t or t < 0:
        return 0
    if q == 1:
        return 1

    size = 0
    for i in range(t+1):
        size += math.comb(n-t, i) * get_maximal_deletion_ball_size(t, t-i, q-1)
    return size

@cache
def get_maximal_insertion_ball_size(n, t, q=2):
    if n < 0 or t < 0:
        return 0
    size = 0
    for i in range(t+1):
        size += math.comb(n+t, i)*int(math.pow((q-1),i))
    return size

@cache
def get_maximal_number_of_common_subsequences(n, t, q=2):
    if n <= t or t <= 0:
        return 0
    return get_maximal_deletion_ball_size(n,t,q) - get_maximal_deletion_ball_size(n-1,t,q) + get_maximal_deletion_ball_size(n-2,t-1,q)

In [41]:
k_x = get_composition_vector(x)
t_x = get_ordering_permutation(x)
l_x = get_order_composition(x)
print(k_x)
print(t_x)
print(l_x)
print(x, number_of_runs(x))
print(get_maximal_deletion_ball_size(n,1))
print(get_maximal_insertion_ball_size(n,1))
print(get_maximal_number_of_common_subsequences(n,1))
print(get_maximal_number_of_common_subsequences(n,2))
print(get_maximal_number_of_common_subsequences(7,3, 3))

[4 6]
[1 0]
[6 4]
[1 1 0 1 0 1 1 1 0 0] 6
10
12
2
16
24


In [42]:
def get_threshold(n, t, order_comp, q=2):
    for i in range(q):
        N = get_maximal_number_of_common_subsequences(n-i-1,t-i,q)
        if N < order_comp[i]:
            return i

def get_u_a_i(subsequences, a, i=1):
    return subsequences[:,subsequences[0,:] == a][1:,:]


def reconstruct_x_from_subsequences(n, subsequences, q=2, print_steps=False):
    t = n-subsequences.shape[0]
    reconstruction = np.array([], dtype=int)
    while t >= 1:
        order_perm = get_ordering_permutation(subsequences[0])
        order_comp = get_order_composition(subsequences[0])
        j = get_threshold(n, t, order_comp, q)
        reconstruction = np.concatenate((reconstruction, order_perm[:j+1]))
        if print_steps:
            print(j, reconstruction)

        n = n-j-1
        t = t-j    
        N = get_maximal_number_of_common_subsequences(n,t,q)+1
        subsequences = get_u_a_i(subsequences, order_perm[j])
        subsequences = subsequences[:,:N]
        
    return np.concatenate((reconstruction, subsequences.T[0]))
            
    

In [43]:
n = 100
t = 4
q = 2

x=np.random.randint(0,q,(n),dtype=np.ubyte)
N = get_maximal_number_of_common_subsequences(n,t,q)+1
D_x = get_deletion_ball(x, t)

while len(D_x) < N:
    print('1')
    x=np.random.randint(0,q,(n),dtype=np.ubyte)
    D_x = get_deletion_ball(x, t)

1
1


In [44]:
subsequences = np.array(D_x).T
print(subsequences.shape)
# print(get_next_subsequnces(subsequences,0).shape)
# print(get_next_subsequnces(subsequences,0))
reconstructed_x = reconstruct_x_from_subsequences(n, subsequences, q)
print(f'Reconstructed {reconstructed_x}')
print(f'From {x}')
print(np.array_equal(x, reconstructed_x))

(96, 390244)
Reconstructed [1 0 1 1 1 1 1 0 0 1 0 1 0 1 0 0 1 0 0 0 1 1 0 0 1 1 1 0 0 1 1 1 1 1 1 0 1
 0 1 0 1 0 0 0 1 0 0 1 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0 1 0 1 1 0 0 1 1 1 1
 1 0 1 0 1 1 0 1 0 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1]
From [1 0 1 1 1 1 1 0 0 1 0 1 0 1 0 0 1 0 0 0 1 1 0 0 1 1 1 0 0 1 1 1 1 1 1 0 1
 0 1 0 1 0 0 0 1 0 0 1 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 0 1 0 1 1 0 0 1 1 1 1
 1 0 1 0 1 1 0 1 0 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1]
True
