In [178]:
import numpy as np
from numpy.random import randint

In [200]:
def RunRandomizedMotifSearch(Dna, k, t):
  best_motifs = []
  best_score = []
  for i in range(1000):
    motifs, motifs_score = RandomizedMotifSearch(Dna, k, t)
    if len(best_motifs) == 0:
      best_motifs = motifs
      best_score = motifs_score
    else:
      if best_score > motifs_score:
        best_score = motifs_score
        best_motifs = motifs
  return best_motifs

Since a single run of RANDOMIZEDMOTIFSEARCH may generate a rather poor set of
motifs, bioinformaticians usually run this algorithm thousands of times. On each run,
they begin from a new randomly selected set of k-mers, selecting the best set of k-mers
found in all these runs.


In [180]:
def RandomizedMotifSearch(Dna, k, t):
  motifs  = SelectRandomKmers(Dna, k, t)
  best_motifs = motifs
  while 1:
    profile = GenerateProfile(motifs, k)
    motifs = Motifs(profile, Dna, k)
    if Score(motifs, k) < Score(best_motifs, k):
      best_motifs = motifs
    else:
      return [best_motifs, Score(best_motifs, k)]

In [229]:
def SelectRandomKmers(Dna, k, t):
  random_kmers = []
  for dna_string in Dna:
    random_kmers.append(FindAllKmers(dna_string, k)[randint(0, len(dna_string) - k + 1)]) #upper bound not included
  return random_kmers

In [202]:
def GenerateProfile(motifs, k):
  profile = np.zeros((4,k))
  motifs_array = DnaToArray(motifs)
  for i in range(k):
    frequency_dict = {'A':0, 'C':0, 'G':0, 'T':0}
    for j in range(len(np.unique(motifs_array[:,i], return_counts=True)[0])):
      frequency_dict[np.unique(motifs_array[:,i], return_counts=True)[0][j]] = np.unique(motifs_array[:,i], return_counts=True)[1][j]
    profile[:,i] = np.array(list(frequency_dict.values()))
  profile = profile + 1
  profile = profile / (len(motifs_array) + 4)
  return profile

In [203]:
def Motifs(profile, Dna, k):
  most_probable_kmers = []
  for dna_string in Dna:
    probabilities = []
    for kmer in FindAllKmers(dna_string,k):
      probabilities.append(KmerProbability(profile, kmer))
    most_probable_kmers.append(FindAllKmers(dna_string,k)[np.argmax(probabilities)])
  return most_probable_kmers

In [204]:
def KmerProbability(profile, kmer):
  probability = 1
  for nucleotide in enumerate(kmer):
    probability = probability * profile[NucleotideIndex(nucleotide[1])][nucleotide[0]]
  return probability

In [205]:
def NucleotideIndex(nucleotide):
  if nucleotide == 'A':
    return 0
  elif nucleotide == 'C':
    return 1
  elif nucleotide == 'G':
    return 2
  else:
    return 3

In [206]:
def IndexNucleotide(index):
  if index == 0:
    return 'A'
  elif index == 1:
    return 'C'
  elif index == 2:
    return 'G'
  else:
    return 'T'

In [207]:
def FindAllKmers(dna_string, k):
  kmers_list = []
  for kmer_end_index in range(k - 1, len(dna_string)):
    kmers_list.append(dna_string[kmer_end_index + 1 - k:kmer_end_index + 1])
  return kmers_list

In [208]:
def Score(motifs, k):
  score = 0
  profile = GenerateProfile(motifs, k)
  consensus = Consensus(profile, k)
  for motif in motifs:
    score = score + HammingDistance(consensus, motif)
  return score

In [209]:
def Consensus(profile, k):
  consensus = []
  for i in range(k):
    consensus.append(IndexNucleotide(np.argmax(profile[:,i])))
  return ''.join(consensus)

In [210]:
def HammingDistance(string1, string2):
  counter = 0
  if len(string1) > len(string2):
    for i in range(len(string2)):
      if string1[i] != string2[i]:
        counter = counter + 1
    counter = counter + (len(string1) - len(string2))
  else:
    for i in range(len(string1)):
      if string1[i] != string2[i]:
        counter = counter + 1
    counter = counter + (len(string2) - len(string1))
  return counter

In [211]:
def DnaToArray(Dna):
  dna_array = np.zeros((len(Dna), len(Dna[0])), dtype='str')
  for dna_string in enumerate(Dna):
    dna_array[dna_string[0],:] = np.asarray(list(dna_string[1]), dtype='str')
  return dna_array

https://www.google.com/search?q=random+dna+strings&rlz=1C1AVFC_enHR898HR898&oq=random+dna+strings&aqs=chrome..69i57j0i546l4.2474j0j7&sourceid=chrome&ie=UTF-8 --> za random dna sekvence

In [168]:
Dna_random = ['gtcaatggcg', 'gaatgttccg', 'tcgttttttg', 'aggaagttac', 'tccgcctacc']

In [109]:
for i in range(len(Dna_random)):
  Dna_random[i] = Dna_random[i].upper()

In [110]:
Dna_random

['GTCAATGGCG', 'GAATGTTCCG', 'TCGTTTTTTG', 'AGGAAGTTAC', 'TCCGCCTACC']

In [111]:
Dna_random_array = DnaToArray(Dna_random)

In [112]:
Dna_random_array

array([['G', 'T', 'C', 'A', 'A', 'T', 'G', 'G', 'C', 'G'],
       ['G', 'A', 'A', 'T', 'G', 'T', 'T', 'C', 'C', 'G'],
       ['T', 'C', 'G', 'T', 'T', 'T', 'T', 'T', 'T', 'G'],
       ['A', 'G', 'G', 'A', 'A', 'G', 'T', 'T', 'A', 'C'],
       ['T', 'C', 'C', 'G', 'C', 'C', 'T', 'A', 'C', 'C']], dtype='<U1')

In [113]:
GenerateProfile(Dna_random_array, len(Dna_random_array))

array([[0.2, 0.2, 0.2, 0.4, 0.4],
       [0. , 0.4, 0.4, 0. , 0.2],
       [0.4, 0.2, 0.4, 0.2, 0.2],
       [0.4, 0.2, 0. , 0.4, 0.2]])

In [114]:
Dna_random2 = ['ctcacatgca', 'attgctctca', 'ctttaccaac', 'tcctcgccgg', 'caccccgggc']

In [115]:
for i in range(len(Dna_random2)):
  Dna_random2[i] = Dna_random2[i].upper()

In [116]:
Dna_random2

['CTCACATGCA', 'ATTGCTCTCA', 'CTTTACCAAC', 'TCCTCGCCGG', 'CACCCCGGGC']

In [117]:
Dna_random_array2 = DnaToArray(Dna_random2)

In [118]:
Dna_random_array2

array([['C', 'T', 'C', 'A', 'C', 'A', 'T', 'G', 'C', 'A'],
       ['A', 'T', 'T', 'G', 'C', 'T', 'C', 'T', 'C', 'A'],
       ['C', 'T', 'T', 'T', 'A', 'C', 'C', 'A', 'A', 'C'],
       ['T', 'C', 'C', 'T', 'C', 'G', 'C', 'C', 'G', 'G'],
       ['C', 'A', 'C', 'C', 'C', 'C', 'G', 'G', 'G', 'C']], dtype='<U1')

In [119]:
GenerateProfile(Dna_random_array2, len(Dna_random_array2))

array([[0.2, 0.2, 0. , 0.2, 0.2],
       [0.6, 0.2, 0.6, 0.2, 0.8],
       [0. , 0. , 0. , 0.2, 0. ],
       [0.2, 0.6, 0.4, 0.4, 0. ]])

In [120]:
GenerateProfile(Dna_random_array, len(Dna_random_array)) == GenerateProfile(Dna_random_array2, len(Dna_random_array2))

array([[ True,  True, False, False, False],
       [False, False, False, False, False],
       [False, False, False,  True, False],
       [False, False, False,  True, False]])

In [121]:
Dna_random3 = ['tgtgaccccc', 'gttaggggtt', 'tgtgcggcat', 'tagtaacagc', 'atcttcgaaa']

In [122]:
for i in range(len(Dna_random3)):
  Dna_random3[i] = Dna_random3[i].upper()

In [123]:
Dna_random3

['TGTGACCCCC', 'GTTAGGGGTT', 'TGTGCGGCAT', 'TAGTAACAGC', 'ATCTTCGAAA']

In [124]:
Dna_random_array3 = DnaToArray(Dna_random3)

In [125]:
Dna_random_array3

array([['T', 'G', 'T', 'G', 'A', 'C', 'C', 'C', 'C', 'C'],
       ['G', 'T', 'T', 'A', 'G', 'G', 'G', 'G', 'T', 'T'],
       ['T', 'G', 'T', 'G', 'C', 'G', 'G', 'C', 'A', 'T'],
       ['T', 'A', 'G', 'T', 'A', 'A', 'C', 'A', 'G', 'C'],
       ['A', 'T', 'C', 'T', 'T', 'C', 'G', 'A', 'A', 'A']], dtype='<U1')

In [126]:
GenerateProfile(Dna_random_array3, len(Dna_random_array3))

array([[0.2, 0.2, 0. , 0.2, 0.4],
       [0. , 0. , 0.2, 0. , 0.2],
       [0.2, 0.4, 0.2, 0.4, 0.2],
       [0.6, 0.4, 0.6, 0.4, 0.2]])

In [127]:
GenerateProfile(Dna_random_array3, len(Dna_random_array3)) == GenerateProfile(Dna_random_array2, len(Dna_random_array2))

array([[ True,  True,  True,  True, False],
       [False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False,  True, False]])

In [135]:
GenerateProfile(Dna_random_array3, len(Dna_random_array3)) == GenerateProfile(Dna_random_array, len(Dna_random_array))

array([[ True,  True, False, False,  True],
       [ True, False, False,  True,  True],
       [False, False, False, False,  True],
       [False, False, False,  True,  True]])

https://birc.au.dk/~palle/php/fabox/random_sequence_generator.php

In [219]:
Dna_random4 = ['tgactgtcat', 'cggttatcat', 'gtacatttgc', 'tcgtatctag', 'atttgcgtac']
Dna_random4

['tgactgtcat', 'cggttatcat', 'gtacatttgc', 'tcgtatctag', 'atttgcgtac']

In [221]:
for i in range(len(Dna_random4)):
  Dna_random4[i] = Dna_random4[i].upper()

In [222]:
Dna_random4

['TGACTGTCAT', 'CGGTTATCAT', 'GTACATTTGC', 'TCGTATCTAG', 'ATTTGCGTAC']

In [131]:
Dna_random_array4 = DnaToArray(Dna_random4)

In [132]:
Dna_random_array4

array([['T', 'G', 'A', 'C', 'T', 'G', 'T', 'C', 'A', 'T'],
       ['C', 'G', 'G', 'T', 'T', 'A', 'T', 'C', 'A', 'T'],
       ['G', 'T', 'A', 'C', 'A', 'T', 'T', 'T', 'G', 'C'],
       ['T', 'C', 'G', 'T', 'A', 'T', 'C', 'T', 'A', 'G'],
       ['A', 'T', 'T', 'T', 'G', 'C', 'G', 'T', 'A', 'C']], dtype='<U1')

In [133]:
GenerateProfile(Dna_random_array4, len(Dna_random_array4))

array([[0.2, 0. , 0.4, 0. , 0.4],
       [0.2, 0.2, 0. , 0.4, 0. ],
       [0.2, 0.4, 0.4, 0. , 0.2],
       [0.4, 0.4, 0.2, 0.6, 0.4]])

In [134]:
GenerateProfile(Dna_random_array4, len(Dna_random_array4)) == GenerateProfile(Dna_random_array3, len(Dna_random_array3)) 

array([[ True, False, False, False,  True],
       [False, False, False, False, False],
       [ True,  True, False, False,  True],
       [False,  True, False, False, False]])

In [136]:
GenerateProfile(Dna_random_array4, len(Dna_random_array4)) == GenerateProfile(Dna_random_array2, len(Dna_random_array2)) 

array([[ True, False, False, False, False],
       [False,  True, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False]])

In [137]:
GenerateProfile(Dna_random_array4, len(Dna_random_array4)) == GenerateProfile(Dna_random_array, len(Dna_random_array)) 

array([[ True, False, False, False,  True],
       [False, False, False, False, False],
       [False, False,  True, False,  True],
       [ True, False, False, False, False]])

Budući da su dna stringovi nasumični, odnosno budući da ne sadrže uzorak u sebi, nema smisla gledati koji je kmer više vjerojatan od drugih kmera 

If the strings in Dna were truly random, then we would expect that all nucleotides in the selected k-mers would be equally likely, resulting in an expected Profile in which every entry is approximately 0.25 --> ako su stringovi u Dna nasumični, onda svaki nukleotid ima istu vjerojatnost pojavljivanja jer bi inače određeni stringovi imali veću vjerojatnost pojavljivanja --> ako su svi stringovi jednako vjerojatni, onda svaki element Profile matrice je 0.25 --> da svi stringovi nisu jednako vjerojatni, onda bi u Profile matrici mogli pronaći one vjerojatnosti koje, kada se pomnože, davaju najveću vjerojatnost pojavljivanja stringa --> taj bi string imao najveću frekevenciju pojavljivanja u Dna

A: 0.25 0.25 0.25 0.25

C: 0.25 0.25 0.25 0.25

G: 0.25 0.25 0.25 0.25

T: 0.25 0.25 0.25 0.25

Primjer

Dna 

ttACCT**taac**

gAT**GTct**gtc

**ccgG**CGTtag

c**acta**ACGAg

cgtcag**AGGT**


Motifs PROFILE(Motifs)

A: 0.4 0.2 0.2 0.2

C: 0.2 0.4 0.2 0.2

G: 0.2 0.2 0.4 0.2

T: 0.2 0.2 0.2 0.4


We can now compute the probabilities of every 4-mer in Dna based on this profile
matrix. For example, the probability of the first 4-mer in the first string of Dna is PR(ttAC|Profile) = 0.2 · 0.2 · 0.2 · 0.2 = 0.0016. 

ttAC tACC ACCT CCTt CTta Ttaa taac

.0016 .0016 .0128 .0064 .0016 .0016 .0016

gATG ATGT TGTc GTct Tctg ctgt tgtc

.0016 .0128 .0016 .0032 .0032 .0032 .0016

ccgG cgGC gGCG GCGT CGTt GTta Ttag

.0064 .0036 .0016 .0128 .0032 .0016 .0016

cact acta ctaA taAC aACG ACGA CGAg

.0032 .0064 .0016 .0016 .0032 .0128 .0016

cgtc gtca tcag cagA agAG gAGG AGGT

.0016 .0016 .0016 .0032 .0032 .0032 .0128

Određeni kmeri imaju dosta veću vjerojatnost od drugih kmera, npr. kmeri ACCT, ATGT, GCGT, ACGA, AGGT --> If the strings in Dna were truly random, then we would expect that all nucleotides in the selected k-mers would be equally likely, resulting in an expected Profile in which every entry is approximately 0.25 --> ako su svi nukleotidi u svakom stupcu Profile matrice jednako vjerojatni, onda su svi kmeri jednako vjerojatni --> budući da određeni kmeri imaju dosta veću vjerojatnost od drugih kmera, svi nukletoidi u svim stupcima nisu jednako vjerojatni --> iz Profile matrice vidimo da najveću vjerojatnost pojavljivanja ima kmer ACGT --> ako svaki stupac Profile matrice gledamo kao da bacamo 4-sided kocku, onda je vjerojatnost da ćemo u npr. 4 uzastopna bacanja 4 različite 4-sided kocke dobiti ACCT, ATGT, GCGT, ACGA, AGGT  dosta veća od vjerojatnosti za ostale kmere --> bacanje svake od 4 4-sided kocke je slučajni pokus, a kada promatramo ishod 4 bacanja 4 različite kocke to je složeni pokus --> ako su kocke biased onda imamo veću vjerojatnost pojavljivanja određenog nukleotida na određenoj poziciji --> jesu li kocke biased ili ne ovisi o tome koje kmere smo uzeli da konstruiramo Profile matricu i koliko slabo očuvanih (weakly conserved) i jako očuvanih pozicija (strongly conserved) ima motiv

Such a uniform profile is essentially useless for motif finding because no string is more probable than any other according to this profile and because it does not provide any clues on what an implanted motif looks like. --> uniformna Profile matrica je beskorisna jer nijedan string nije više vjerojatan od drugih stringova pa ne znamo kako izgleda motiv

We have already noticed that if the strings in Dna were random, then
RANDOMIZEDMOTIFSEARCH would start from a nearly uniform profile, and there
would be nothing to work with. However, the key observation is that the strings in Dna are not random because they include the implanted motif! --> stringovi u Dna nisu random jer sadržavaju motiv, odnosno da su random ne bi mogli objasniti povećanu ekspresiju određenog gena jer bi se transcription factor vezao na bilo koji kmer, tj. bilo koji kmer bi bio transcription factor binding site te ne bismo mogli objasniti zašto ekspresija određenih gena slijedi cirkadijarni ritam. --> jesu li kmeri u Dna random ako odabiranjem kmera ne uhvatimo motiv ni u jednom Dna stringu? --> ako odabiranjem odaberemo npr. 5 kmera koji su svi jednako vjerojatni i od njih napravimo Profile matricu, Profile matrica bi trebala biti uniformna jer su svi stringovi jednako vjerojatni (nijedan string nije više vjerojatan od drugih stringova što nam ne govori kako motiv izgleda)

**These multiple occurrences of the same motif may create a bias in the profile matrix, directing it away from the uniform profile and toward the implanted motif**. --> ako motiv ima dosta jako očuvanih pozicija (strongly conserved positions) i malo slabo očuvanih pozicija (weakly conserved positions) onda višestruke pojave motiva unose bias u Profile matricu

In [252]:
Dna = ['CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA', 'GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG', 'TAGTACCGAGACCGAAAGAAGTATACAGGCGT', 'TAGATCAAGTTTCAGGTGCACGTCGGTGAACC', 'AATCCACCAGCTCCACGTGCAATGTTGGCCTA']

In [253]:
k = 8

In [254]:
t = 5

In [255]:
Dna

['CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA',
 'GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG',
 'TAGTACCGAGACCGAAAGAAGTATACAGGCGT',
 'TAGATCAAGTTTCAGGTGCACGTCGGTGAACC',
 'AATCCACCAGCTCCACGTGCAATGTTGGCCTA']

In [256]:
RunRandomizedMotifSearch(Dna, k, t)

['TCTCGGGG', 'CCAAGGTG', 'TACAGGCG', 'TTCAGGTG', 'TCCACGTG']

In [239]:
k = 15

In [240]:
t = 20

In [241]:
with open('/content/rosalind_ba2f.txt') as task_file:
  Dna = [line.rstrip() for line in task_file]

In [242]:
Dna

['CCCCTATGGTACGCGCTAGTCAACTATGGTGTATGGCGAATGCGGTCGCCCCCAACTTTGAGTGATCTGGGAAAGGCATGACCGTGGAATTATACGTTATCCCTCAGATAATTACTAATAAAGCCGTGTTTACGGAAGGTCACATCGTGCACATAGTAAGGACCAGTTTGACGTACTATCCAGTGCCCCTATGGTACGCG',
 'CTAGTCAACTATGGTGTATGGCGAATGCGGTCGCCCCCAACTTTGAGTGATCTGGGAAAGGCATGACCGTGGAATTATACGTTATCCAGGGCCGCACTTGGGCTCAGATAATTACTAATAAAGCCGTGTTTACGGAAGGTCACATCGTGCACATAGTAAGGACCAGTTTGACGTACTATCCAGTGCCCCTATGGTACGCG',
 'TGCAGGCTAGCCTCGGGTAAAACGGGGTTCCGACCCTCGCTTGTTGATAGGTGAAAGCGCCGTTACGTAGGAGGATCTTGTCTTACTGCTTCGACTACTAGCGCTGGTGTGCCTTGTAAACGGGACATGACTATCCGATTTACGCAGGTTTCATGCACTTGGGGTACTTTTCGCTGAAGGAATTAACAGGCCCACCTCAA',
 'GACTTTTTTACTCTCACCAGGCGGCGCGCTCCACCACAACCCAACCGCTATTCTCCATCCAGAGGCATTCCCTGCAAGTTCAGACAGGCATGCACTGAAGGTCAAAGAAAGCTAGACTAACCCCACTGCTTGAGCAGTATAGCCGCTTTGTTGCCCGACTGGGGGGACAGAAGAATAAAGGTATGACTGTCAAGTAAAGT',
 'CGGCTAAGGCAAATACTTGGGGTTAAAACAGCTAACTTCGCGAGCGCATTGGTCTCCACTTGTCCATGACAAGAGCTTCAACGATGCAGGGAAATGTTGGCGCACAGGTCACCATTAGCCCCTCCGGGCCAGCCGGACAGCACTGGCACTTGCTTGGGAGGGCCGAGCTGTTTATGAT

In [243]:
solutions_list = RunRandomizedMotifSearch(Dna, k, t)

In [244]:
for solution in solutions_list:
  print(solution)

AGGCATGACCGTGGA
AGGGCCGCACTTGGG
TTTCATGCACTTGGG
AGGCATGCACTGAAG
AGGCAAATACTTGGG
AGGCATGCACTTCCA
AGGTGGGCACTTGGG
AGGCATGCACGGAGG
GGGCATGCACTTGCC
AGGCGAACACTTGGG
AGAGCTGCACTTGGG
AGGCATATTCTTGGG
AATGATGCACTTGGG
AGGCATGCAACGGGG
AGGCATGTGTTTGGG
AGGCAGCGACTTGGG
TAGCATGCACTTGGC
AGGCATGCCTCTGGG
AGGCTAACACTTGGG
AGGCATAATCTTGGG


In [245]:
k = 15

In [246]:
t = 20

In [247]:
with open('/content/rosalind_ba2f.txt') as task_file:
  Dna = [line.rstrip() for line in task_file]

In [248]:
Dna

['GCCTCTCTTTTGCTCCAACTTCAGAATGGTAAATAGGAGCCCGTCGGAATGACTTTTCTAGTAAGGTTTTTACAAGAGATCGAACGCCATCCTGTTTAGTCGAGATTATCTAAGTGGCCAAATAAACGGTGCACGAAGAACGCCTCCATGTGTTACTTCGGACTGCTGGTCCTGAAAGACTTTGACATTCGCAGCCTCTCTTTTGCTC',
 'CAACTTCAGAATGGTAAATAGGAGCCCGTCGGAATGACTTTTCTAGTAAGGTTTTTACAAGAGATCGAACGCCATCCTGTTTAGTCGAGATTATCTAAGTGGCCAAATAAACGGTGCACGAAGAACGCCTCCATGTGTTACTTCGGACTGCTGGTCCTGAAAGACTTTGAAACGGACCTACGAAGCATTCGCAGCCTCTCTTTTGCTC',
 'CAGCTGTTTTGCTACCACCCCTTCGTTAAGTGAATCAACTCTGGTACGAAGGTGGACGATAGCAGACCAAGCAACAGATGAGGAGCCCGAATTGAACGATTCCTGGGACACCCACGATGCCCAGCTCACGAGTCCGAGAAAACCACATTCCTCCTTGCCACTTCCACATGTGGCGGGTCCCTTCCTGGTAATCTCTACCATCCTTCAG',
 'CATCCCGTGCTGGTTGCCTTTCTAGTTATCCCTCTTAACCAAAGTCGCGAGCCATCGCGCAGTAGGTTGTGCCGAGCTTGTGAAAGTGAAATTGATGGTCAGAACACTCCGTTGTGATTCGGATCCTTTCGTTGCCATGTCCAGTGCAGAATTCGTGATAGGTACTGCCCTCCCCTACGAAATCGTGGACGAAGAGCTAGATACTCTA',
 'GGCTTAGAACTCCCCTACTTCGGTCTCTGTATTATTTTGAAGCTGCGTATTGGGAATAAGATGTGAATCAATTCGAGCGCCCCGACACGCTCTGGCTGCGAACGGGATCGCTCGGATAAGTTAAAATATTCACCGTCCGTCGGGAA

In [249]:
solutions_list = RunRandomizedMotifSearch(Dna, k, t)

In [250]:
for solution in solutions_list:
  print(solution)

GACTTTTCTAGTAAG
AACGGACCTACGAAG
AACTCTGGTACGAAG
CCCTCCCCTACGAAA
AACTCCCCTACTTCG
AACTCCCCACTGAAG
CGATCCCCTACGAAG
ACGGCCCCTACGAAG
AACTCCCCTAGACAG
AACTCCCCTCTTAAG
AAGCACCCTACGAAG
AACTCTAGTACGAAG
AACTCCGGAACGAAG
CACTCCCCTACGACT
AACTTGTCTACGAAG
AACTCCCCTACGCGA
AACTATGCTACGAAG
AACTCCTAGACGAAG
AACAAGCCTACGAAG
AACTCCCACGCGAAG
