In [4]:
import numpy as np
from copy import deepcopy

In [5]:
def FMIndex(bwt):
    symbols = sorted(np.unique([symb for symb in bwt]))
    matrix = [{symb: 0 for symb in symbols}]
    for i, symb in enumerate(bwt):
        matrix.append(deepcopy(matrix[-1]))
        matrix[-1][symb] += 1
    Offset = {symb: 0 for symb in symbols}
    for i in range(1, len(symbols)):
        Offset[symbols[i]] = Offset[symbols[i - 1]] + matrix[-1][symbols[i - 1]]
    return matrix, Offset

In [9]:
bwt = "annb$aa"
FM, Offset = FMIndex(bwt)

print ("%2s,%2s,%2s,%2s" % tuple([symbol for symbol in sorted(Offset.keys())]))
for row in FM:
    print ("%2d,%2d,%2d,%2d" % tuple([row[symbol] for symbol in sorted(row.keys())]))
print('\n')   
print(Offset)

 $, a, b, n
 0, 0, 0, 0
 0, 1, 0, 0
 0, 1, 0, 1
 0, 1, 0, 2
 0, 1, 1, 2
 1, 1, 1, 2
 1, 2, 1, 2
 1, 3, 1, 2


{'$': 0, 'a': 1, 'b': 4, 'n': 5}


In [10]:
def findBWT(s, FM, Offset):
    lo = 0
    hi = -1
    for symb in s[::-1]:
        lo = Offset[symb] + FM[lo][symb]
        hi = Offset[symb] + FM[hi][symb]
    return lo, hi

In [11]:
print(findBWT("ana", FM, Offset))
print(findBWT("ban", FM, Offset))
print(findBWT("ann", FM, Offset))

(2, 4)
(4, 5)
(4, 4)


In [12]:
def inverseBWT(s):
    column = [symb for symb in s]
    while len(column[0]) != len(s):
        sorted_column = sorted(column)
        column = [symb + sorted_column[i] for i, symb in enumerate(s)]
    for i in column:
        if i[-1] == '$':
            return i

In [13]:
print(inverseBWT("ltherea$"))
print(inverseBWT("amnnn$lcpmnapaaaaaaala"))
print(inverseBWT("annb$aa"))
print(inverseBWT("nn$bnbaaaaa"))

tarheel$
amanaplanacanalpanama$
banana$
abananaban$
