In [7]:
def burrows_wheeler_transform(text):
    rotations = [text[i:] + text[:i] for i in range(len(text))]
    sorted_rotations = sorted(rotations)
    bwt = ''.join(rotation[-1] for rotation in sorted_rotations)
    idx = sorted_rotations.index(text)
    return bwt, idx

def inverse_burrows_wheeler_transform(bwt, idx):
    table = [''] * len(bwt)
    for i in range(len(bwt)):
        table = sorted(bwt[i] + table[i] for i in range(len(bwt)))
    text = table[idx]
    return text

# Пример использования:
text = "hello_world$"
bwt_result, idx = burrows_wheeler_transform(text)
print("BWT:", bwt_result)
original_text = inverse_burrows_wheeler_transform(bwt_result, idx)
print("Original text:", original_text)

BWT: dolh$rellwo_
Original text: hello_world$


In [6]:
def average_repeated_sequence_length(s):
    total_length = 0
    repeated_sequences_count = 0

    i = 0
    while i < len(s):
        current_char = s[i]
        sequence_length = 1

        # Count the length of the repeated sequence
        while i + 1 < len(s) and s[i + 1] == current_char:
            sequence_length += 1
            i += 1

        if sequence_length > 1:
            total_length += sequence_length
            repeated_sequences_count += 1

        i += 1

    if repeated_sequences_count == 0:
        return 0

    average_length = total_length / repeated_sequences_count
    result = (total_length - 2 * repeated_sequences_count) / len(s)

    return average_length, result

# Пример использования:
s = "aaabbbcccdddeeefffj"
avg_length, result = average_repeated_sequence_length(s)
print("Average length of repeated sequences:", avg_length)
print("Result:", result)

Average length of repeated sequences: 3.0
Result: 0.3157894736842105


In [2]:
def naive_suffix_array(text):
    suffixes = [(text[i:], i) for i in range(len(text))]
    suffixes.sort(key=lambda x: x[0])
    return [suffix[1] for suffix in suffixes]

# Пример использования:
text = "banana$"
suffix_array = naive_suffix_array(text)
print("Suffix array for text", text, ":", suffix_array)

Suffix array for text banana$ : [6, 5, 3, 1, 0, 4, 2]


In [3]:
def bwt_last_column(text, suffix_array):
    bwt = ''.join(text[i - 1] for i in suffix_array)
    return ''.join(bwt)

# Пример использования:
text = "banana"
suffix_array = naive_suffix_array(text)
last_column = bwt_last_column(text, suffix_array)
print("Last column of Burrows-Wheeler matrix for text", text, ":", last_column)

Last column of Burrows-Wheeler matrix for text banana : nnbaaa


In [13]:
def suffix_types(text):
    n = len(text)
    types = [''] * n
    
    # Определяем тип суффикса для '$' как 'S'
    types[n - 1] = 'S'
    
    # Проходим по символам строки справа налево
    for i in range(n - 2, -1, -1):
        # Если текущий символ меньше следующего, то суффикс типа 'S'
        if text[i] < text[i + 1]:
            types[i] = 'S'
        # Если символы равыны, то суффикс типа предыдущего
        elif text[i] == text[i + 1]:
            types[i] = types[i + 1]
        # Иначе суффикс типа 'L'
        else:
            types[i] = 'L'
    
    return ''.join(types)

# Пример использования:
text = "banana$"
types = suffix_types(text)
print("Suffix types for text", text, ":", types)

Suffix types for text banana$ : LSLSLLS
