In [0]:
import numpy as np
from collections import defaultdict

#Construct a Trie from a Collection of Patterns

In [0]:
class Trie:
  def __init__(self):
    self.next = np.full((10000,4),-1,dtype=int)
    self.edge = []
    self.node_idx = 0
  def get_edgelist(self):
    return self.edge
  def insert(self, word):
    mp = {'A':0, 'C':1, 'G':2, 'T':3}
    root = 0
    for ch in word:
      key = mp[ch]
      if self.next[root][key] == -1:
        self.node_idx += 1
        self.edge.append((root, self.node_idx, ch))
        self.next[root][key] = self.node_idx
      root = self.next[root][key]

In [0]:
def trie_construct(patterns):
  trie = Trie()
  for p in patterns:
    trie.insert(p)
  return trie.get_edgelist()

#Implement TrieMatching

In [0]:
class Trie:
  def __init__(self):
    self.nxt = np.full((100000,4),-1,dtype=int)
    self.end = np.zeros(100000, dtype=int)
    self.node_idx = 0
  def insert(self, word):
    mp = {'A':0,'C':1,'G':2,'T':3}
    root = 0
    for ch in word:
      key = mp[ch]
      if self.nxt[root][key] == -1:
        self.node_idx += 1
        self.nxt[root][key] = self.node_idx 
      root = self.nxt[root][key]
    self.end[root] = 1
  def search(self, text):
    mp = {'A':0,'C':1,'G':2,'T':3}
    n , position = len(text), []
    for idx in range(n):
      root = 0
      for j in range(idx,n):
        key = mp[text[j]]
        if self.nxt[root][key] == -1:
          break
        root = self.nxt[root][key]
        if self.end[root]:
          position.append(idx)
    return position

In [0]:
def implement_trie_matching(text, patterns):
  trie = Trie()
  for p in patterns:
    trie.insert(p)
  ans = trie.search(text)
  print(*ans)

In [0]:
with open('rosalind_ba9b.txt', 'r') as file:
  data = file.read().splitlines()
  text = data[0]
  patterns = data[1:]
  implement_trie_matching(text, patterns)

31 38 52 183 190 197 211 218 231 240 247 254 310 320 332 404 444 451 461 503 619 650 657 669 683 698 710 717 736 743 775


In [0]:
def suffix_array(text):
  return sorted(range(len(text)), key=lambda i: text[i:])

In [0]:
suffix_array('AACGATAGCGGTAGA$')

[15, 14, 0, 1, 12, 6, 4, 2, 8, 13, 3, 7, 9, 10, 11, 5]

In [0]:
string  = 'AACGATAGCGGTAGA$'
suf = [string[idx:] for idx in range(len(string))]
for s in sorted(suf):
  print(s)

In [0]:
def bwt(text):
  return ''.join(text[k-1] for k in sorted(range(len(text)), key=lambda i: text[i:]))

In [53]:
bwt('monisaha$')

'ahsan$omi'

In [0]:
def generate_two_way_map(text):
  fcnt,rcnt = defaultdict(int),defaultdict(int)
  fmap,rmap = dict(),dict()
  for idx , ch in enumerate(text):
    rcnt[ch] += 1
    rmap[idx] =  (ch, rcnt[ch])
  for idx, ch in enumerate(sorted(text)):
    fcnt[ch] += 1
    fmap[(ch,fcnt[ch])] = idx
  return fmap,rmap

In [0]:
def inv_bwt(text):
  fmap,rmap = generate_two_way_map(text)
  rev_text = '$'
  idx = 0
  while True:
    if len(rev_text) == len(text):
      break
    ch,idx = rmap[idx]
    rev_text += ch
    idx = fmap[(ch,idx)]
  return rev_text[::-1]

In [0]:
x = !cat rosalind_ba9j.txt

In [0]:
with open('out_i.txt','w') as file:
  file.write(inv_bwt(x[0]))