# Construct the Burrows-Wheeler Transform of a String

In [7]:
def BWT(text):
    new_text = text + text
    n = len(text)
    ls = [new_text[i:i+n] for i in range(n)]
    return ''.join([s[-1] for s in sorted(ls)])

In [14]:
print(BWT('GCGTGCCTGGTCA$'))

ACTGGCT$TGCGGC


# Generate the Last-to-First Mapping of a String

In [20]:
from collections import defaultdict    

In [24]:
def last_to_first(text, k):
    cnt_for_last = defaultdict(int)
    last_column = dict()
    for idx, ch in enumerate(text):
        cnt_for_last[ch] += 1
        last_column[idx] = (ch, cnt_for_last[ch])
    #endfor
    cnt_for_first = defaultdict(int)
    first = ''.join(sorted(text))
    first_column = dict()
    for idx, ch in enumerate(first):
        cnt_for_first[ch] += 1
        first_column[(ch,cnt_for_first[ch])] = idx
    #endfor
    return first_column[last_column[k]]

In [25]:
last_to_first('T$GACCA',3)

1

# Inverse Burrows-Wheeler Transform Problem

In [27]:
def build_two_way_map(text):
    cnt_for_last = defaultdict(int)
    last_column = dict()
    for idx, ch in enumerate(text):
        cnt_for_last[ch] += 1
        last_column[idx] = (ch, cnt_for_last[ch])
    #endfor
    cnt_for_first = defaultdict(int)
    first = ''.join(sorted(text))
    first_column = dict()
    for idx, ch in enumerate(first):
        cnt_for_first[ch] += 1
        first_column[(ch,cnt_for_first[ch])] = idx
    #endfor
    return first_column, last_column
    

In [33]:
def inverse_BWT(text):
    first_column, last_column = build_two_way_map(text)
    n = len(text)
    forward = '$'
    pos = first_column[('$',1)]
    while True:
        if len(forward) == n:
            break
        #endif
        ch, idx = last_column[pos]
        forward += ch
        pos = first_column[(ch,idx)]
    #endwhile
    return forward[::-1]        
    

In [34]:
print(inverse_BWT('TTCCTAACG$A'))

TACATCACGT$
