In [3]:
from burrows_wheeler import *

# (Forward) Burrows Wheeler Transform

## Example 1 : 

Burrows-Wheeler Transform of a string 


In [12]:
banana = BWT('banana')
print(banana.string)
print(banana.processed_string)

banana
banana$


In [13]:
print(banana.permute_matrix(lexical_sort = False))

[['anana$b']
 ['nana$ba']
 ['ana$ban']
 ['na$bana']
 ['a$banan']
 ['$banana']
 ['banana$']]


In [15]:
print(banana.permute_matrix(lexical_sort = True))
# sorts entries in alphabetical order

[['$banana']
 ['a$banan']
 ['ana$ban']
 ['anana$b']
 ['banana$']
 ['na$bana']
 ['nana$ba']]


In [17]:
banana_bwt = banana.bwt()  # gives BWT
print(banana_bwt)

annb$aa


## Example 2

In [18]:
dna = BWT('AAAGAAAAAAAAGGCGCGCGCGCGCGGCTTTT')

In [19]:
dna.permute_matrix(lexical_sort = True)

array([['$AAAGAAAAAAAAGGCGCGCGCGCGCGGCTTTT'],
       ['AAAAAAAAGGCGCGCGCGCGCGGCTTTT$AAAG'],
       ['AAAAAAAGGCGCGCGCGCGCGGCTTTT$AAAGA'],
       ['AAAAAAGGCGCGCGCGCGCGGCTTTT$AAAGAA'],
       ['AAAAAGGCGCGCGCGCGCGGCTTTT$AAAGAAA'],
       ['AAAAGGCGCGCGCGCGCGGCTTTT$AAAGAAAA'],
       ['AAAGAAAAAAAAGGCGCGCGCGCGCGGCTTTT$'],
       ['AAAGGCGCGCGCGCGCGGCTTTT$AAAGAAAAA'],
       ['AAGAAAAAAAAGGCGCGCGCGCGCGGCTTTT$A'],
       ['AAGGCGCGCGCGCGCGGCTTTT$AAAGAAAAAA'],
       ['AGAAAAAAAAGGCGCGCGCGCGCGGCTTTT$AA'],
       ['AGGCGCGCGCGCGCGGCTTTT$AAAGAAAAAAA'],
       ['CGCGCGCGCGCGGCTTTT$AAAGAAAAAAAAGG'],
       ['CGCGCGCGCGGCTTTT$AAAGAAAAAAAAGGCG'],
       ['CGCGCGCGGCTTTT$AAAGAAAAAAAAGGCGCG'],
       ['CGCGCGGCTTTT$AAAGAAAAAAAAGGCGCGCG'],
       ['CGCGGCTTTT$AAAGAAAAAAAAGGCGCGCGCG'],
       ['CGGCTTTT$AAAGAAAAAAAAGGCGCGCGCGCG'],
       ['CTTTT$AAAGAAAAAAAAGGCGCGCGCGCGCGG'],
       ['GAAAAAAAAGGCGCGCGCGCGCGGCTTTT$AAA'],
       ['GCGCGCGCGCGCGGCTTTT$AAAGAAAAAAAAG'],
       ['GCGCGCGCGCGGCTTTT$AAAGAAA

In [21]:
dna_bwt = dna.bwt()
print(dna_bwt)

TGAAAA$AAAAAGGGGGGGAGCCCCCGACTTTC


# (Inverse) Burrows Wheeler Transform

In [25]:
# We use the previously saved bwt transform of dna and obtain the original sequence
# From the bwt of the string, we obtain the real string (or sequence of characters)

print('Original string:',dna.string)
print('BWT of this string:',dna_bwt)

Original string: AAAGAAAAAAAAGGCGCGCGCGCGCGGCTTTT
BWT of this string: TGAAAA$AAAAAGGGGGGGAGCCCCCGACTTTC


In [29]:
# Use dna_bwt to obtain the original sequence again

# Step 1 : Obtain the Permutation matrix
dnabwt = BWT(dna_bwt)
ibwt_dna_bwt_mat = dnabwt.ibwt_matrix() 

In [32]:
print(ibwt_dna_bwt_mat)
# same as original matrix

[['$AAAGAAAAAAAAGGCGCGCGCGCGCGGCTTTT']
 ['AAAAAAAAGGCGCGCGCGCGCGGCTTTT$AAAG']
 ['AAAAAAAGGCGCGCGCGCGCGGCTTTT$AAAGA']
 ['AAAAAAGGCGCGCGCGCGCGGCTTTT$AAAGAA']
 ['AAAAAGGCGCGCGCGCGCGGCTTTT$AAAGAAA']
 ['AAAAGGCGCGCGCGCGCGGCTTTT$AAAGAAAA']
 ['AAAGAAAAAAAAGGCGCGCGCGCGCGGCTTTT$']
 ['AAAGGCGCGCGCGCGCGGCTTTT$AAAGAAAAA']
 ['AAGAAAAAAAAGGCGCGCGCGCGCGGCTTTT$A']
 ['AAGGCGCGCGCGCGCGGCTTTT$AAAGAAAAAA']
 ['AGAAAAAAAAGGCGCGCGCGCGCGGCTTTT$AA']
 ['AGGCGCGCGCGCGCGGCTTTT$AAAGAAAAAAA']
 ['CGCGCGCGCGCGGCTTTT$AAAGAAAAAAAAGG']
 ['CGCGCGCGCGGCTTTT$AAAGAAAAAAAAGGCG']
 ['CGCGCGCGGCTTTT$AAAGAAAAAAAAGGCGCG']
 ['CGCGCGGCTTTT$AAAGAAAAAAAAGGCGCGCG']
 ['CGCGGCTTTT$AAAGAAAAAAAAGGCGCGCGCG']
 ['CGGCTTTT$AAAGAAAAAAAAGGCGCGCGCGCG']
 ['CTTTT$AAAGAAAAAAAAGGCGCGCGCGCGCGG']
 ['GAAAAAAAAGGCGCGCGCGCGCGGCTTTT$AAA']
 ['GCGCGCGCGCGCGGCTTTT$AAAGAAAAAAAAG']
 ['GCGCGCGCGCGGCTTTT$AAAGAAAAAAAAGGC']
 ['GCGCGCGCGGCTTTT$AAAGAAAAAAAAGGCGC']
 ['GCGCGCGGCTTTT$AAAGAAAAAAAAGGCGCGC']
 ['GCGCGGCTTTT$AAAGAAAAAAAAGGCGCGCGC']
 ['GCGGCTTTT$AAAGAAAAAAAA

In [35]:
ibwt_dnabwt = dnabwt.ibwt()

print(ibwt_dnabwt)  # We get back the original string

AAAGAAAAAAAAGGCGCGCGCGCGCGGCTTTT$


## Example 3

In [41]:
miss = BWT('MISSISSIPPI')
miss_bwt = miss.bwt()

print('New word:',miss.string)
print('BWT of new word:',miss_bwt)

New word: MISSISSIPPI
BWT of new word: IPSSM$PISSII


In [43]:
# Inverse bwt of 'IPSSM$PISSII'

miss_ibwt = BWT('IPSSM$PISSII').ibwt()
print('Inverse Burrows Wheeler Transform:',miss_ibwt)

Inverse Burrows Wheeler Transform: MISSISSIPPI$
