In [1]:
### Global utils
class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

## ES3

### Write a program in a programming language of your choice (Python, for instance), to compute the Burrows- Wheeler Transform of a text T in two different ways:
1. by sorting all the cyclic rotations of T
2. by using appending \$ to T and then applying the SA-IS algorithms to sort the suffixes of T$

### The algorithm should output also the number of the equal-letter runs denoted by r produced by the BWT(T) and r$ if produced by BWT(T$)
### Test your program on:
- a random word
- a Fibonacci word of a given order.
- all the words of a given length and report the words on which the number of equal-letter runs is maximal.

### Compare the two measures when applied to Fibonacci words of even order with the last symbol removed. Let us recall Fibonacci words are defined as follows:
- 𝑓 0 = 𝑏, 𝑓 1 = 𝑎, 𝑓 𝑖 = 𝑓 𝑖−1 𝑓 𝑖−2, 𝑖 ≥ 2.
### Example: 𝑓 5 = 𝑎𝑏𝑎𝑎𝑏𝑎𝑏𝑎 , 𝑓 6 = 𝑎𝑏𝑎𝑎𝑏𝑎𝑏𝑎𝑎𝑏𝑎𝑎𝑏

In [2]:
from collections import Counter


class BWT:
    def __init__(self, text):
        if not text:
            raise ValueError("Input text cannot be empty.")
        self.text = text

    def find_equal_letter_runs(self, bwt_result: str):
        """Identify and return the equal-letter runs in the BWT result"""
        runs = []
        n = len(bwt_result)

        if n == 0:
            return runs

        current_char = bwt_result[0]
        run_start = 0

        for i in range(1, n):
            if bwt_result[i] != current_char:
                # Append the run as a tuple (character, start_index, length)
                #if i - run_start > 1:
                runs.append((current_char, run_start, i - run_start))
                current_char = bwt_result[i]
                run_start = i

        # Append the last run
        #if n - run_start > 1:
        runs.append((current_char, run_start, n - run_start))

        return runs

    def bwt_via_cyclic_rotations(self, log = True):
        """Compute BWT using sorting of cyclic rotations"""
        T = self.text
        n = len(T)

        rotations = [T[i:] + T[:i] for i in range(n)]
        rotations_sorted = sorted(rotations)

        for r in rotations_sorted:
            if log:
                print(f"{r[:-1]}{bcolors.OKGREEN}{r[-1]}{f' {bcolors.FAIL}<- I' if r == T else ''}{bcolors.ENDC}")

        I = rotations_sorted.index(T)
        # compute BWT by taking last character from each sorted rotation
        bwt_result = "".join(rotation[-1] for rotation in rotations_sorted)
        return bwt_result, I

    def __get_last_avilable_position(self, l: list):
        """Return the last element equal to -1 from a list l"""
        for i in range(len(l) - 1, -1, -1):
            if l[i] == -1:
                return i
        return None

    def __find_lms_types(self, t: list, T: list):
        """Function that use T char and t type lists to find letfmost S-types
           A suffix Ti (with i>1) is LMS-type (leftmost S-type or S*) if Ti is S-type and Ti-1 is L-type."""
        for i in range(len(T) - 1, -1, -1):
            t[i] = "S*" if t[i] == "S" and t[i - 1] == "L" else t[i]
        return t
    
    def __get_lms_substrings(self, type_list: list, text_list: list):
        """Get all the substring that starts from a leftmost S-type to the next one until the $ character
        Also return if a duplicate is found"""
        substr_dict = {}
        start = None
        duplicate_lms = {}
        is_repeated = False

        for i, marker in enumerate(type_list):
            if marker == "S*":
                if start is None:
                    start = i
                else:
                    end = i + 1
                    substr = "".join(text_list[start:end])
                    # Check for duplicates
                    if substr in duplicate_lms.keys():
                        duplicate_lms[substr] += 1
                        is_repeated = True
                    else:
                        duplicate_lms[substr] = 0
                        
                    substr_dict[start] = substr
                    start = i

        # Finally add also $ as LMS substring
        substr_dict[len(text_list) - 1] = text_list[-1]
        return substr_dict, is_repeated

    def __run_l_s_type_classification(self, T: list):
        """Start from the char list and create the classification type t list based on the following rules:
           - S type if char is less than or equal to its next one
           - L type otherwise (if char is grater than its next one)
        """
        t = ["" for _ in T]
        # By definition the last char ($) is S type
        t[-1] = "S"
        for i in range(len(T) - 2, -1, -1):
            t[i] = "S" if T[i] <= T[i + 1] else "L"
        return t

    def __init_buckets(self, t: list, T: list):
        """Initialize suffix array with buckets and utils pointers head and tail
           then insert all LMS starting from tail
           SA example is:
               {'$': {'bucket': [-1], 'head': 0, 'tail': 1},
                'A': {'bucket': [-1], 'head': 0, 'tail': 1},
                'B': {'bucket': [-1, -1], 'head': 0, 'tail': 2},
                'C': {'bucket': [-1], 'head': 0, 'tail': 1}}
        """
        SA = {}
        for char, count in Counter(sorted(T)).items():
            SA[char] = {"bucket": [-1 for _ in range(count)], "head": 0, "tail": count}

        # After the SA structure is initialized, inserts all LMS-positions at the end of their respective buckets
        for i in range(len(T) - 1, -1, -1):
            if t[i] == "S*":
                la_position = self.__get_last_avilable_position(SA[T[i]]["bucket"])
                SA[T[i]]["bucket"][la_position] = i
                # print(SA[T[i]])

        return SA

    def __run_l_to_r_scan(self, t: list, T: list, SA: dict):
        """Perform left to right scan and populate buckets from its start (using head indices)"""
        for value in SA.values():
            for el in value["bucket"]:
                if t[el - 1] == "L" and el != -1 and el - 1 > 0:
                    # print(el-1, T[el-1])
                    fa_position = SA[T[el - 1]]["head"]
                    SA[T[el - 1]]["bucket"][fa_position] = el - 1
                    SA[T[el - 1]]["head"] += 1
        return SA

    def __run_r_to_l_scan(self, t: list, T: list, SA: dict):
        """Perform right to left scan and populate buckets from its end (using tail indices)"""
        for value in reversed(SA.values()):
            for el in reversed(value["bucket"]):
                if t[el - 1].startswith("S") and el - 1 >= 0:
                    # print(el, T[el - 1])
                    fa_position = SA[T[el - 1]]["tail"] - 1
                    SA[T[el - 1]]["bucket"][fa_position] = el - 1
                    SA[T[el - 1]]["tail"] -= 1
        return SA

    def __construct_name_array(self, flatten_buckets: list, substr: dict):
        """Construct names array structure by reading Suffix Array
           and by assigning new symbols for each different leftmost S-type substrings"""
        prev_lms = None
        names = [-1] * len(flatten_buckets)
        curr_name = "A" # In this implementation the new symbols for LMS substr strart from 'A'
        for i in range(len(flatten_buckets)):
            # Check if number in bucket is a LMS substring
            if flatten_buckets[i] in substr.keys():
                current_lms = substr[flatten_buckets[i]]
                # print("found", flatten_buckets[i], current_lms)
                # Only for the first element ($)
                if prev_lms is None:
                    names[flatten_buckets[i]] = "$"
                # If the next substr in the sorted data structure is same as previous
                elif prev_lms == current_lms:
                    names[flatten_buckets[i]] = chr(ord(curr_name) - 1)
                # If the next substr in the sorted data structure is different
                else:
                    names[flatten_buckets[i]] = curr_name
                    curr_name = chr(ord(curr_name) + 1)

                prev_lms = current_lms

        return names

    def __sais(self, text: str):
        """Custom implementation of SA-IS sorting algorithm"""
        T = [char for char in text]

        # L-Type and S-Type classification
        t = self.__run_l_s_type_classification(T)
        t = self.__find_lms_types(t, T)
        print(f"{'I':<3}:", " ".join(f"{i:<2}" for i in list(range(len(T)))))
        print(f"{'T':<3}:", " ".join(f"{t:<2}" for t in T))
        print(f"{'t':<3}:", end=" ")
        for c in t:
            color = bcolors.OKBLUE if c.startswith("S") else bcolors.WARNING
            print(f"{color}{c:<2}{bcolors.ENDC}", end=" ")
        print()

        # LMS substring strarts from LMS-type to the next LMS-type
        substr, is_repeted = self.__get_lms_substrings(t, T)
        print("LMS subtrings:", substr)
        print(
            f"{bcolors.OKBLUE}",
            "With" if is_repeted else "Without",
            f"repetitions.{bcolors.ENDC}\n",
        )

        # Init buckets and Suffix Array
        SA = self.__init_buckets(t, T)
        print(
            f"{'After insert LMS  ':<2}:",
            " ".join(f"{el:<3}" for bucket in SA.values() for el in bucket["bucket"]),
        )

        # Then add the L-type positions in a left-to-right scan of SA
        SA = self.__run_l_to_r_scan(t, T, SA)
        print(
            f"{'After L-to-R scan ':<3}:",
            " ".join(f"{el:<3}" for bucket in SA.values() for el in bucket["bucket"]),
        )

        # Finally, add the S-type positions in a right-to-left scan of SA
        # possibly overwriting previously inserted positions.
        SA = self.__run_r_to_l_scan(t, T, SA)
        print(
            f"{'After R-to-L scan ':<3}:",
            " ".join(f"{el:<3}" for bucket in SA.values() for el in bucket["bucket"]),
        )

        # Contruct name array
        flatten_buckets = [el for bucket in SA.values() for el in bucket["bucket"]]
        names = self.__construct_name_array(flatten_buckets, substr)

        # Collapse original string into a new one called T1
        T1 = "".join([str(n) if n != -1 else "" for n in names])
        print(f"{bcolors.OKGREEN}New compact string: {T1}{bcolors.ENDC}")

        return T1, not is_repeted, flatten_buckets

    def bwt_via_suffix_array(self):
        """Perform Burrows-Wheeler transform by sorting suffixes with SA-IS algorithm"""
        T = self.text + "$"
        end = False
        i = 0
        while not end:
            if i == 100:
                raise RuntimeError("Error, max iteration reached")
            print(f"#### SA-IS iter {i} ####")
            T, end, SA = self.__sais(T)
            i += 1
            
            print("SA:", SA, "\n")
        
        # SA: [8, 7, 2, 5, 0, 3, 6, 1, 4]
        # T: AB$
        # text: abaababa

        # Costruzione del BWT
        T = self.text + "$"
        bwt = ""
        for i in range(len(SA)):
            if SA[i] == 0:
                bwt += T[-1]  # Se la posizione è 0, prendi l'ultimo carattere
            else:
                bwt += T[SA[i] - 1]  # Altrimenti prendi il carattere precedente

        return bwt
    

### Utils

In [3]:
from itertools import product


def get_fibonacci_words(i: int) -> str:
    """Return the sequence of Fibonacci words up to the ith order"""
    if i <= 0:
        return []
    elif i == 1:
        return ["a"]
    elif i == 2:
        return ["a", "ab"]

    words = ["a", "ab"]
    for j in range(2, i):
        words.append(words[j - 1] + words[j - 2])

    return words[-1]


assert get_fibonacci_words(5) == "abaababa", "Error, wrong fibonacci implementation"
assert (
    get_fibonacci_words(6) == "abaababaabaab"
), "Error, wrong fibonacci implementation"


def generate_words(s: str, n: int):
    """
    Generate all words of length n using symbols from s.

    :param s: A string containing the allowed symbols.
    :param n: An integer representing the length of the words.
    :return: A list of generated words.
    """
    return ["".join(p) for p in product(s, repeat=n)]

### Analysis

#### Burrows-wheeler trasform via cyclic rotations

In [4]:
te = "abaaba$"
bwt = BWT(te)
print("-" * (len(te) + 12))
print("Input text:", te)
bwt_result, I = bwt.bwt_via_cyclic_rotations()
print(f"{bcolors.OKCYAN}BWT (rotations): {bwt_result}{bcolors.ENDC}")
print("With I:", I, "\n")
runs = bwt.find_equal_letter_runs(bwt_result)
if runs:
    print("Equal letter runs:", len(runs))
for char, start, length in runs:
    print(f"Character: {char}, Start Index: {start}, Length: {length}")


-------------------
Input text: abaaba$
$abaab[92ma[0m
a$abaa[92mb[0m
aaba$a[92mb[0m
aba$ab[92ma[0m
abaaba[92m$ [91m<- I[0m
ba$aba[92ma[0m
baaba$[92ma[0m
[96mBWT (rotations): abba$aa[0m
With I: 4 

Equal letter runs: 5
Character: a, Start Index: 0, Length: 1
Character: b, Start Index: 1, Length: 2
Character: a, Start Index: 3, Length: 1
Character: $, Start Index: 4, Length: 1
Character: a, Start Index: 5, Length: 2


In [5]:
te = "".join(get_fibonacci_words(5))
bwt = BWT(te)
print("-" * (len(te) + 12))
print("Input text:", te)
bwt_result, I = bwt.bwt_via_cyclic_rotations()
print(f"{bcolors.OKCYAN}BWT (rotations): {bwt_result}{bcolors.ENDC}")
print("With I:", I, "\n")
runs = bwt.find_equal_letter_runs(bwt_result)
if runs:
    print("Equal letter runs:", len(runs))
for char, start, length in runs:
    print(f"Character: {char}, Start Index: {start}, Length: {length}")


--------------------
Input text: abaababa
aabaaba[92mb[0m
aababaa[92mb[0m
abaabaa[92mb[0m
abaabab[92ma [91m<- I[0m
ababaab[92ma[0m
baabaab[92ma[0m
baababa[92ma[0m
babaaba[92ma[0m
[96mBWT (rotations): bbbaaaaa[0m
With I: 3 

Equal letter runs: 2
Character: b, Start Index: 0, Length: 3
Character: a, Start Index: 3, Length: 5


In [6]:
text = "".join(get_fibonacci_words(6))
bwt = BWT(te)
print("-" * (len(te) + 12))
print("Input text:", te)
bwt_result, I = bwt.bwt_via_cyclic_rotations()
print(f"{bcolors.OKCYAN}BWT (rotations): {bwt_result}{bcolors.ENDC}")
print("With I:", I, "\n")
runs = bwt.find_equal_letter_runs(bwt_result)
if runs:
    print("Equal letter runs:", len(runs))
for char, start, length in runs:
    print(f"Character: {char}, Start Index: {start}, Length: {length}")


--------------------
Input text: abaababa
aabaaba[92mb[0m
aababaa[92mb[0m
abaabaa[92mb[0m
abaabab[92ma [91m<- I[0m
ababaab[92ma[0m
baabaab[92ma[0m
baababa[92ma[0m
babaaba[92ma[0m
[96mBWT (rotations): bbbaaaaa[0m
With I: 3 

Equal letter runs: 2
Character: b, Start Index: 0, Length: 3
Character: a, Start Index: 3, Length: 5


In [7]:
words = generate_words("abc", 3)

max_equal_runs = 0
max_word = []
max_bwt_result = []
for w in words:
    bwt = BWT(w)
    bwt_result, I = bwt.bwt_via_cyclic_rotations(log=False)
    runs = bwt.find_equal_letter_runs(bwt_result)
    if len(runs) > max_equal_runs:
        max_equal_runs = len(runs)
        max_word.clear()
        max_bwt_result.clear()
    if len(runs) == max_equal_runs:
        max_word.append(w)
        max_bwt_result.append(bwt_result)

print("Max equal runs:", max_equal_runs)
for i, w in enumerate(max_word):
    print(f"Word: {w}, BWT: {max_bwt_result[i]}")

Max equal runs: 3
Word: abc, BWT: cab
Word: acb, BWT: bca
Word: bac, BWT: bca
Word: bca, BWT: cab
Word: cab, BWT: cab
Word: cba, BWT: bca


In [8]:
text = "".join(get_fibonacci_words(6))[:-1]
bwt = BWT(te)
print("-" * (len(te) + 12))
print("Input text:", te)
bwt_result, I = bwt.bwt_via_cyclic_rotations()
print(f"{bcolors.OKCYAN}BWT (rotations): {bwt_result}{bcolors.ENDC}")
print("With I:", I, "\n")
runs = bwt.find_equal_letter_runs(bwt_result)
if runs:
    print("Equal letter runs:", len(runs))
for char, start, length in runs:
    print(f"Character: {char}, Start Index: {start}, Length: {length}")


--------------------
Input text: abaababa
aabaaba[92mb[0m
aababaa[92mb[0m
abaabaa[92mb[0m
abaabab[92ma [91m<- I[0m
ababaab[92ma[0m
baabaab[92ma[0m
baababa[92ma[0m
babaaba[92ma[0m
[96mBWT (rotations): bbbaaaaa[0m
With I: 3 

Equal letter runs: 2
Character: b, Start Index: 0, Length: 3
Character: a, Start Index: 3, Length: 5


#### Burrows-Wheeleer transform via SA-IS sorting

In [9]:
text = [
    "abaaba",
    "".join(get_fibonacci_words(5)),
    "".join(get_fibonacci_words(6)),
    "".join(get_fibonacci_words(6))[:-1],
    "dabracadabrac",
]

for te in text:
    bwt = BWT(te)
    print("-" * (len(te) + 12))
    print("Input text:", te + "\n")
    bwt_result = bwt.bwt_via_suffix_array()
    print(f"{bcolors.OKCYAN}BWT (suffix array): {bwt_result}{bcolors.ENDC}")
    runs = bwt.find_equal_letter_runs(bwt_result)
    if runs:
        print("Equal letter runs:", len(runs))
    for char, start, length in runs:
        print(f"Character: {char}, Start Index: {start}, Length: {length}")
    print()

------------------
Input text: abaaba

#### SA-IS iter 0 ####
I  : 0  1  2  3  4  5  6 
T  : a  b  a  a  b  a  $ 
t  : [94mS [0m [93mL [0m [94mS*[0m [94mS [0m [93mL [0m [93mL [0m [94mS*[0m 
LMS subtrings: {2: 'aaba$', 6: '$'}
[94m Without repetitions.[0m

After insert LMS  : 6   -1  -1  -1  2   -1  -1 
After L-to-R scan : 6   5   -1  -1  2   4   1  
After R-to-L scan : 6   5   2   3   0   4   1  
[92mNew compact string: A$[0m
SA: [6, 5, 2, 3, 0, 4, 1] 

[96mBWT (suffix array): abba$aa[0m
Equal letter runs: 5
Character: a, Start Index: 0, Length: 1
Character: b, Start Index: 1, Length: 2
Character: a, Start Index: 3, Length: 1
Character: $, Start Index: 4, Length: 1
Character: a, Start Index: 5, Length: 2

--------------------
Input text: abaababa

#### SA-IS iter 0 ####
I  : 0  1  2  3  4  5  6  7  8 
T  : a  b  a  a  b  a  b  a  $ 
t  : [94mS [0m [93mL [0m [94mS*[0m [94mS [0m [93mL [0m [94mS*[0m [93mL [0m [93mL [0m [94mS*[0m 
LMS subtrings: {2: 'a

In [10]:
text = "abaaba"

bwt = BWT(text)
print("-" * (len(text) + 12))
print("Input text:", text + "\n")
bwt_result = bwt.bwt_via_suffix_array()
print(f"{bcolors.OKCYAN}BWT (suffix array): {bwt_result}{bcolors.ENDC}")
runs = bwt.find_equal_letter_runs(bwt_result)
if runs:
    print("Equal letter runs:", len(runs))
for char, start, length in runs:
    print(f"Character: {char}, Start Index: {start}, Length: {length}")

------------------
Input text: abaaba

#### SA-IS iter 0 ####
I  : 0  1  2  3  4  5  6 
T  : a  b  a  a  b  a  $ 
t  : [94mS [0m [93mL [0m [94mS*[0m [94mS [0m [93mL [0m [93mL [0m [94mS*[0m 
LMS subtrings: {2: 'aaba$', 6: '$'}
[94m Without repetitions.[0m

After insert LMS  : 6   -1  -1  -1  2   -1  -1 
After L-to-R scan : 6   5   -1  -1  2   4   1  
After R-to-L scan : 6   5   2   3   0   4   1  
[92mNew compact string: A$[0m
SA: [6, 5, 2, 3, 0, 4, 1] 

[96mBWT (suffix array): abba$aa[0m
Equal letter runs: 5
Character: a, Start Index: 0, Length: 1
Character: b, Start Index: 1, Length: 2
Character: a, Start Index: 3, Length: 1
Character: $, Start Index: 4, Length: 1
Character: a, Start Index: 5, Length: 2


In [11]:
text = "".join(get_fibonacci_words(5))

bwt = BWT(text)
print("-" * (len(text) + 12))
print("Input text:", text + "\n")
bwt_result = bwt.bwt_via_suffix_array()
print(f"{bcolors.OKCYAN}BWT (suffix array): {bwt_result}{bcolors.ENDC}")
runs = bwt.find_equal_letter_runs(bwt_result)
if runs:
    print("Equal letter runs:", len(runs))
for char, start, length in runs:
    print(f"Character: {char}, Start Index: {start}, Length: {length}")

--------------------
Input text: abaababa

#### SA-IS iter 0 ####
I  : 0  1  2  3  4  5  6  7  8 
T  : a  b  a  a  b  a  b  a  $ 
t  : [94mS [0m [93mL [0m [94mS*[0m [94mS [0m [93mL [0m [94mS*[0m [93mL [0m [93mL [0m [94mS*[0m 
LMS subtrings: {2: 'aaba', 5: 'aba$', 8: '$'}
[94m Without repetitions.[0m

After insert LMS  : 8   -1  -1  -1  2   5   -1  -1  -1 
After L-to-R scan : 8   7   -1  -1  2   5   6   1   4  
After R-to-L scan : 8   7   2   5   0   3   6   1   4  
[92mNew compact string: AB$[0m
SA: [8, 7, 2, 5, 0, 3, 6, 1, 4] 

[96mBWT (suffix array): abbb$aaaa[0m
Equal letter runs: 4
Character: a, Start Index: 0, Length: 1
Character: b, Start Index: 1, Length: 3
Character: $, Start Index: 4, Length: 1
Character: a, Start Index: 5, Length: 4


In [12]:
text = "".join(get_fibonacci_words(6))

bwt = BWT(text)
print("-" * (len(text) + 12))
print("Input text:", text + "\n")
bwt_result = bwt.bwt_via_suffix_array()
print(f"{bcolors.OKCYAN}BWT (suffix array): {bwt_result}{bcolors.ENDC}")
runs = bwt.find_equal_letter_runs(bwt_result)
if runs:
    print("Equal letter runs:", len(runs))
for char, start, length in runs:
    print(f"Character: {char}, Start Index: {start}, Length: {length}")

-------------------------
Input text: abaababaabaab

#### SA-IS iter 0 ####
I  : 0  1  2  3  4  5  6  7  8  9  10 11 12 13
T  : a  b  a  a  b  a  b  a  a  b  a  a  b  $ 
t  : [94mS [0m [93mL [0m [94mS*[0m [94mS [0m [93mL [0m [94mS*[0m [93mL [0m [94mS*[0m [94mS [0m [93mL [0m [94mS*[0m [94mS [0m [93mL [0m [94mS*[0m 
LMS subtrings: {2: 'aaba', 5: 'aba', 7: 'aaba', 10: 'aab$', 13: '$'}
[94m With repetitions.[0m

After insert LMS  : 13  -1  -1  -1  -1  2   5   7   10  -1  -1  -1  -1  -1 
After L-to-R scan : 13  -1  -1  -1  -1  2   5   7   10  12  1   4   6   9  
After R-to-L scan : 13  10  2   7   11  0   3   5   8   12  1   4   6   9  
[92mNew compact string: BCBA$[0m
SA: [13, 10, 2, 7, 11, 0, 3, 5, 8, 12, 1, 4, 6, 9] 

#### SA-IS iter 1 ####
I  : 0  1  2  3  4 
T  : B  C  B  A  $ 
t  : [94mS [0m [93mL [0m [93mL [0m [93mL [0m [94mS*[0m 
LMS subtrings: {4: '$'}
[94m Without repetitions.[0m

After insert LMS  : 4   -1  -1  -1  -1 
After L-to-R scan

In [13]:
text = "".join(get_fibonacci_words(6))[:-1]

bwt = BWT(text)
print("-" * (len(text) + 12))
print("Input text:", text + "\n")
bwt_result = bwt.bwt_via_suffix_array()
print(f"{bcolors.OKCYAN}BWT (suffix array): {bwt_result}{bcolors.ENDC}")
runs = bwt.find_equal_letter_runs(bwt_result)
if runs:
    print("Equal letter runs:", len(runs))
for char, start, length in runs:
    print(f"Character: {char}, Start Index: {start}, Length: {length}")

------------------------
Input text: abaababaabaa

#### SA-IS iter 0 ####
I  : 0  1  2  3  4  5  6  7  8  9  10 11 12
T  : a  b  a  a  b  a  b  a  a  b  a  a  $ 
t  : [94mS [0m [93mL [0m [94mS*[0m [94mS [0m [93mL [0m [94mS*[0m [93mL [0m [94mS*[0m [94mS [0m [93mL [0m [94mS*[0m [93mL [0m [94mS*[0m 
LMS subtrings: {2: 'aaba', 5: 'aba', 7: 'aaba', 10: 'aa$', 12: '$'}
[94m With repetitions.[0m

After insert LMS  : 12  -1  -1  -1  -1  2   5   7   10  -1  -1  -1  -1 
After L-to-R scan : 12  11  -1  -1  -1  2   5   7   10  1   4   6   9  
After R-to-L scan : 12  11  10  2   7   0   3   5   8   1   4   6   9  
[92mNew compact string: BCBA$[0m
SA: [12, 11, 10, 2, 7, 0, 3, 5, 8, 1, 4, 6, 9] 

#### SA-IS iter 1 ####
I  : 0  1  2  3  4 
T  : B  C  B  A  $ 
t  : [94mS [0m [93mL [0m [93mL [0m [93mL [0m [94mS*[0m 
LMS subtrings: {4: '$'}
[94m Without repetitions.[0m

After insert LMS  : 4   -1  -1  -1  -1 
After L-to-R scan : 4   3   2   -1  1  
After R-to-L s